lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <49c8b59f-b281-6248-36ac-fe519bd4a9a2@solarflare.com>
Date:   Thu, 15 Jun 2017 15:36:44 +0100
From:   Edward Cree <ecree@...arflare.com>
To:     <davem@...emloft.net>,
        Alexei Starovoitov <alexei.starovoitov@...il.com>,
        Alexei Starovoitov <ast@...com>,
        Daniel Borkmann <daniel@...earbox.net>
CC:     <netdev@...r.kernel.org>,
        iovisor-dev <iovisor-dev@...ts.iovisor.org>
Subject: [RFC PATCH v2 net-next 02/10] bpf/verifier: rework value tracking

Tracks value alignment by means of tracking known & unknown bits.
Tightens some min/max value checks and fixes a couple of bugs therein.
If pointer leaks are allowed, and adjust_ptr_min_max_vals returns -EACCES,
 treat the pointer as an unknown scalar and try again, because we might be
 able to conclude something about the result (e.g. pointer & 0x40 is either
 0 or 0x40).

Signed-off-by: Edward Cree <ecree@...arflare.com>
---
 include/linux/bpf.h          |   34 +-
 include/linux/bpf_verifier.h |   40 +-
 include/linux/tnum.h         |   79 ++
 kernel/bpf/Makefile          |    2 +-
 kernel/bpf/tnum.c            |  163 ++++
 kernel/bpf/verifier.c        | 1692 ++++++++++++++++++++++++------------------
 6 files changed, 1235 insertions(+), 775 deletions(-)
 create mode 100644 include/linux/tnum.h
 create mode 100644 kernel/bpf/tnum.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1bcbf0a..7c01ff2 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -116,35 +116,25 @@ enum bpf_access_type {
 };
 
 /* types of values stored in eBPF registers */
+/* Pointer types represent:
+ * pointer
+ * pointer + imm
+ * pointer + (u16) var
+ * pointer + (u16) var + imm
+ * if (range > 0) then [ptr, ptr + range - off) is safe to access
+ * if (id > 0) means that some 'var' was added
+ * if (off > 0) means that 'imm' was added
+ */
 enum bpf_reg_type {
 	NOT_INIT = 0,		 /* nothing was written into register */
-	UNKNOWN_VALUE,		 /* reg doesn't contain a valid pointer */
+	SCALAR_VALUE,		 /* reg doesn't contain a valid pointer */
 	PTR_TO_CTX,		 /* reg points to bpf_context */
 	CONST_PTR_TO_MAP,	 /* reg points to struct bpf_map */
 	PTR_TO_MAP_VALUE,	 /* reg points to map element value */
 	PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
-	FRAME_PTR,		 /* reg == frame_pointer */
-	PTR_TO_STACK,		 /* reg == frame_pointer + imm */
-	CONST_IMM,		 /* constant integer value */
-
-	/* PTR_TO_PACKET represents:
-	 * skb->data
-	 * skb->data + imm
-	 * skb->data + (u16) var
-	 * skb->data + (u16) var + imm
-	 * if (range > 0) then [ptr, ptr + range - off) is safe to access
-	 * if (id > 0) means that some 'var' was added
-	 * if (off > 0) menas that 'imm' was added
-	 */
-	PTR_TO_PACKET,
+	PTR_TO_STACK,		 /* reg == frame_pointer + offset */
+	PTR_TO_PACKET,		 /* reg points to skb->data */
 	PTR_TO_PACKET_END,	 /* skb->data + headlen */
-
-	/* PTR_TO_MAP_VALUE_ADJ is used for doing pointer math inside of a map
-	 * elem value.  We only allow this if we can statically verify that
-	 * access from this register are going to fall within the size of the
-	 * map element.
-	 */
-	PTR_TO_MAP_VALUE_ADJ,
 };
 
 struct bpf_prog;
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 189741c..90affdb 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -9,6 +9,7 @@
 
 #include <linux/bpf.h> /* for enum bpf_reg_type */
 #include <linux/filter.h> /* for MAX_BPF_STACK */
+#include <linux/tnum.h>
 
  /* Just some arbitrary values so we can safely do math without overflowing and
   * are obviously wrong for any sort of memory access.
@@ -19,30 +20,39 @@
 struct bpf_reg_state {
 	enum bpf_reg_type type;
 	union {
-		/* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
-		s64 imm;
-
-		/* valid when type == PTR_TO_PACKET* */
-		struct {
-			u16 off;
-			u16 range;
-		};
+		/* valid when type == PTR_TO_PACKET */
+		u32 range;
 
 		/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
 		 *   PTR_TO_MAP_VALUE_OR_NULL
 		 */
 		struct bpf_map *map_ptr;
 	};
+	/* Fixed part of pointer offset, pointer types only */
+	s32 off;
+	/* Used to find other pointers with the same variable offset, so they
+	 * can share range knowledge.
+	 * Exception: for PTR_TO_MAP_VALUE_OR_NULL this is used to share which
+	 * map value we came from, when one is tested for != NULL.  Note that
+	 * this overloading means that we can't do pointer arithmetic on a
+	 * PTR_TO_MAP_VALUE_OR_NULL, we have to NULL-check it _first_.
+	 */
 	u32 id;
+	/* These three fields must be last.  See states_equal() */
+	/* For scalar types (SCALAR_VALUE), this represents our knowledge of
+	 * the actual value.
+	 * For pointer types, this represents the variable part of the offset
+	 * from the pointed-to object, and is shared with all bpf_reg_states
+	 * with the same id as us.
+	 */
+	struct tnum var_off;
 	/* Used to determine if any memory access using this register will
-	 * result in a bad access. These two fields must be last.
-	 * See states_equal()
+	 * result in a bad access.
+	 * These refer to the same value as var_off, not necessarily the actual
+	 * contents of the register.
 	 */
-	s64 min_value;
-	u64 max_value;
-	u32 min_align;
-	u32 aux_off;
-	u32 aux_off_align;
+	s64 min_value; /* minimum possible (s64)value */
+	u64 max_value; /* maximum possible (u64)value */
 };
 
 enum bpf_stack_slot_type {
diff --git a/include/linux/tnum.h b/include/linux/tnum.h
new file mode 100644
index 0000000..a0b07bf
--- /dev/null
+++ b/include/linux/tnum.h
@@ -0,0 +1,79 @@
+/* tnum: tracked (or tristate) numbers
+ *
+ * A tnum tracks knowledge about the bits of a value.  Each bit can be either
+ * known (0 or 1), or unknown (x).  Arithmetic operations on tnums will
+ * propagate the unknown bits such that the tnum result represents all the
+ * possible results for possible values of the operands.
+ */
+#include <linux/types.h>
+
+struct tnum {
+	u64 value;
+	u64 mask;
+};
+
+/* Constructors */
+/* Represent a known constant as a tnum. */
+struct tnum tnum_const(u64 value);
+/* A completely unknown value */
+extern const struct tnum tnum_unknown;
+
+/* Arithmetic and logical ops */
+/* Shift a tnum left (by a fixed shift) */
+struct tnum tnum_lshift(struct tnum a, u8 shift);
+/* Shift a tnum right (by a fixed shift) */
+struct tnum tnum_rshift(struct tnum a, u8 shift);
+/* Add two tnums, return @a + @b */
+struct tnum tnum_add(struct tnum a, struct tnum b);
+/* Subtract two tnums, return @a - @b */
+struct tnum tnum_sub(struct tnum a, struct tnum b);
+/* Bitwise-AND, return @a & @b */
+struct tnum tnum_and(struct tnum a, struct tnum b);
+/* Bitwise-OR, return @a | @b */
+struct tnum tnum_or(struct tnum a, struct tnum b);
+/* Bitwise-XOR, return @a ^ @b */
+struct tnum tnum_xor(struct tnum a, struct tnum b);
+/* Multiply two tnums, return @a * @b */
+struct tnum tnum_mul(struct tnum a, struct tnum b);
+
+/* Return a tnum representing numbers satisfying both @a and @b */
+struct tnum tnum_intersect(struct tnum a, struct tnum b);
+
+/* Return @a with all but the lowest @size bytes cleared */
+struct tnum tnum_cast(struct tnum a, u8 size);
+
+/* Returns true if @a is a known constant */
+static inline bool tnum_is_const(struct tnum a)
+{
+	return !a.mask;
+}
+
+/* Returns true if @a == tnum_const(@b) */
+static inline bool tnum_equals_const(struct tnum a, u64 b)
+{
+	return tnum_is_const(a) && a.value == b;
+}
+
+/* Returns true if @a is completely unknown */
+static inline bool tnum_is_unknown(struct tnum a)
+{
+	return !~a.mask;
+}
+
+/* Returns true if @a is known to be a multiple of @size.
+ * @size must be a power of two.
+ */
+bool tnum_is_aligned(struct tnum a, u64 size);
+
+/* Returns true if @b represents a subset of @a. */
+bool tnum_in(struct tnum a, struct tnum b);
+
+/* Formatting functions.  These have snprintf-like semantics: they will write
+ * up to @size bytes (including the terminating NUL byte), and return the number
+ * of bytes (excluding the terminating NUL) which would have been written had
+ * sufficient space been available.  (Thus tnum_sbin always returns 64.)
+ */
+/* Format a tnum as a pair of hex numbers (value; mask) */
+int tnum_strn(char *str, size_t size, struct tnum a);
+/* Format a tnum as tristate binary expansion */
+int tnum_sbin(char *str, size_t size, struct tnum a);
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index e1e5e65..df14def 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1,6 +1,6 @@
 obj-y := core.o
 
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
 ifeq ($(CONFIG_PERF_EVENTS),y)
 obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c
new file mode 100644
index 0000000..803bd0d
--- /dev/null
+++ b/kernel/bpf/tnum.c
@@ -0,0 +1,163 @@
+/* tnum: tracked (or tristate) numbers
+ *
+ * A tnum tracks knowledge about the bits of a value.  Each bit can be either
+ * known (0 or 1), or unknown (x).  Arithmetic operations on tnums will
+ * propagate the unknown bits such that the tnum result represents all the
+ * possible results for possible values of the operands.
+ */
+#include <linux/kernel.h>
+#include <linux/tnum.h>
+
+#define TNUM(_v, _m)	(struct tnum){.value = _v, .mask = _m}
+/* A completely unknown value */
+const struct tnum tnum_unknown = { .value = 0, .mask = -1 };
+
+struct tnum tnum_const(u64 value)
+{
+	return TNUM(value, 0);
+}
+
+struct tnum tnum_lshift(struct tnum a, u8 shift)
+{
+	return TNUM(a.value << shift, a.mask << shift);
+}
+
+struct tnum tnum_rshift(struct tnum a, u8 shift)
+{
+	return TNUM(a.value >> shift, a.mask >> shift);
+}
+
+struct tnum tnum_add(struct tnum a, struct tnum b)
+{
+	u64 sm, sv, sigma, chi, mu;
+
+	sm = a.mask + b.mask;
+	sv = a.value + b.value;
+	sigma = sm + sv;
+	chi = sigma ^ sv;
+	mu = chi | a.mask | b.mask;
+	return TNUM(sv & ~mu, mu);
+}
+
+struct tnum tnum_sub(struct tnum a, struct tnum b)
+{
+	u64 dv, alpha, beta, chi, mu;
+
+	dv = a.value - b.value;
+	alpha = dv + a.mask;
+	beta = dv - b.mask;
+	chi = alpha ^ beta;
+	mu = chi | a.mask | b.mask;
+	return TNUM(dv & ~mu, mu);
+}
+
+struct tnum tnum_and(struct tnum a, struct tnum b)
+{
+	u64 alpha, beta, v;
+
+	alpha = a.value | a.mask;
+	beta = b.value | b.mask;
+	v = a.value & b.value;
+	return TNUM(v, alpha & beta & ~v);
+}
+
+struct tnum tnum_or(struct tnum a, struct tnum b)
+{
+	u64 v, mu;
+
+	v = a.value | b.value;
+	mu = a.mask | b.mask;
+	return TNUM(v, mu & ~v);
+}
+
+struct tnum tnum_xor(struct tnum a, struct tnum b)
+{
+	u64 v, mu;
+
+	v = a.value ^ b.value;
+	mu = a.mask | b.mask;
+	return TNUM(v & ~mu, mu);
+}
+
+/* half-multiply add: acc += (unknown * mask * value).
+ * An intermediate step in the multiply algorithm.
+ */
+static struct tnum hma(struct tnum acc, u64 value, u64 mask)
+{
+	while (mask) {
+		if (mask & 1)
+			acc = tnum_add(acc, TNUM(0, value));
+		mask >>= 1;
+		value <<= 1;
+	}
+	return acc;
+}
+
+struct tnum tnum_mul(struct tnum a, struct tnum b)
+{
+	struct tnum acc;
+	u64 pi;
+
+	pi = a.value * b.value;
+	acc = hma(TNUM(pi, 0), a.mask, b.mask | b.value);
+	return hma(acc, b.mask, a.value);
+}
+
+/* Note that if a and b disagree - i.e. one has a 'known 1' where the other has
+ * a 'known 0' - this will return a 'known 1' for that bit.
+ */
+struct tnum tnum_intersect(struct tnum a, struct tnum b)
+{
+	u64 v, mu;
+
+	v = a.value | b.value;
+	mu = a.mask & b.mask;
+	return TNUM(v & ~mu, mu);
+}
+
+struct tnum tnum_cast(struct tnum a, u8 size)
+{
+	a.value &= (1ULL << (size * 8)) - 1;
+	a.mask &= (1ULL << (size * 8)) - 1;
+	return a;
+}
+
+bool tnum_is_aligned(struct tnum a, u64 size)
+{
+	if (!size)
+		return true;
+	return !((a.value | a.mask) & (size - 1));
+}
+
+bool tnum_in(struct tnum a, struct tnum b)
+{
+	if (b.mask & ~a.mask)
+		return false;
+	b.value &= ~a.mask;
+	return a.value == b.value;
+}
+
+int tnum_strn(char *str, size_t size, struct tnum a)
+{
+	return snprintf(str, size, "(%#llx; %#llx)", a.value, a.mask);
+}
+
+int tnum_sbin(char *str, size_t size, struct tnum a)
+{
+	size_t n;
+
+	for (n = 64; n; n--) {
+		if (n < size) {
+			if (a.mask & 1)
+				str[n - 1] = 'x';
+			else if (a.value & 1)
+				str[n - 1] = '1';
+			else
+				str[n - 1] = '0';
+		}
+		a.mask >>= 1;
+		a.value >>= 1;
+	}
+	str[min(size - 1, (size_t)64)] = 0;
+	return 64;
+}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 44b97d9..6f4389c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -61,12 +61,12 @@
  * (and -20 constant is saved for further stack bounds checking).
  * Meaning that this reg is a pointer to stack plus known immediate constant.
  *
- * Most of the time the registers have UNKNOWN_VALUE type, which
+ * Most of the time the registers have SCALAR_VALUE type, which
  * means the register has some value, but it's not a valid pointer.
- * (like pointer plus pointer becomes UNKNOWN_VALUE type)
+ * (like pointer plus pointer becomes SCALAR_VALUE type)
  *
  * When verifier sees load or store instructions the type of base register
- * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, FRAME_PTR. These are three pointer
+ * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK. These are three pointer
  * types recognized by check_mem_access() function.
  *
  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
@@ -180,15 +180,12 @@ static __printf(1, 2) void verbose(const char *fmt, ...)
 /* string representation of 'enum bpf_reg_type' */
 static const char * const reg_type_str[] = {
 	[NOT_INIT]		= "?",
-	[UNKNOWN_VALUE]		= "inv",
+	[SCALAR_VALUE]		= "inv",
 	[PTR_TO_CTX]		= "ctx",
 	[CONST_PTR_TO_MAP]	= "map_ptr",
 	[PTR_TO_MAP_VALUE]	= "map_value",
 	[PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
-	[PTR_TO_MAP_VALUE_ADJ]	= "map_value_adj",
-	[FRAME_PTR]		= "fp",
 	[PTR_TO_STACK]		= "fp",
-	[CONST_IMM]		= "imm",
 	[PTR_TO_PACKET]		= "pkt",
 	[PTR_TO_PACKET_END]	= "pkt_end",
 };
@@ -221,32 +218,36 @@ static void print_verifier_state(struct bpf_verifier_state *state)
 		if (t == NOT_INIT)
 			continue;
 		verbose(" R%d=%s", i, reg_type_str[t]);
-		if (t == CONST_IMM || t == PTR_TO_STACK)
-			verbose("%lld", reg->imm);
-		else if (t == PTR_TO_PACKET)
-			verbose("(id=%d,off=%d,r=%d)",
-				reg->id, reg->off, reg->range);
-		else if (t == UNKNOWN_VALUE && reg->imm)
-			verbose("%lld", reg->imm);
-		else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE ||
-			 t == PTR_TO_MAP_VALUE_OR_NULL ||
-			 t == PTR_TO_MAP_VALUE_ADJ)
-			verbose("(ks=%d,vs=%d,id=%u)",
-				reg->map_ptr->key_size,
-				reg->map_ptr->value_size,
-				reg->id);
-		if (reg->min_value != BPF_REGISTER_MIN_RANGE)
-			verbose(",min_value=%lld",
-				(long long)reg->min_value);
-		if (reg->max_value != BPF_REGISTER_MAX_RANGE)
-			verbose(",max_value=%llu",
-				(unsigned long long)reg->max_value);
-		if (reg->min_align)
-			verbose(",min_align=%u", reg->min_align);
-		if (reg->aux_off)
-			verbose(",aux_off=%u", reg->aux_off);
-		if (reg->aux_off_align)
-			verbose(",aux_off_align=%u", reg->aux_off_align);
+		if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
+		    tnum_is_const(reg->var_off)) {
+			/* reg->off should be 0 for SCALAR_VALUE */
+			verbose("%lld", reg->var_off.value + reg->off);
+		} else {
+			verbose("(id=%d", reg->id);
+			if (t != SCALAR_VALUE)
+				verbose(",off=%d", reg->off);
+			if (t == PTR_TO_PACKET)
+				verbose(",r=%d", reg->range);
+			else if (t == CONST_PTR_TO_MAP ||
+				 t == PTR_TO_MAP_VALUE ||
+				 t == PTR_TO_MAP_VALUE_OR_NULL)
+				verbose(",ks=%d,vs=%d",
+					reg->map_ptr->key_size,
+					reg->map_ptr->value_size);
+			if (reg->min_value != BPF_REGISTER_MIN_RANGE)
+				verbose(",min_value=%lld",
+					(long long)reg->min_value);
+			if (reg->max_value != BPF_REGISTER_MAX_RANGE)
+				verbose(",max_value=%llu",
+					(unsigned long long)reg->max_value);
+			if (!tnum_is_unknown(reg->var_off)) {
+				char tn_buf[48];
+
+				tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+				verbose(",var_off=%s", tn_buf);
+			}
+			verbose(")");
+		}
 	}
 	for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
 		if (state->stack_slot_type[i] == STACK_SPILL)
@@ -463,55 +464,85 @@ static const int caller_saved[CALLER_SAVED_REGS] = {
 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
 };
 
-static void mark_reg_not_init(struct bpf_reg_state *regs, u32 regno)
-{
-	BUG_ON(regno >= MAX_BPF_REG);
+static void __mark_reg_not_init(struct bpf_reg_state *reg);
 
-	memset(&regs[regno], 0, sizeof(regs[regno]));
-	regs[regno].type = NOT_INIT;
-	regs[regno].min_value = BPF_REGISTER_MIN_RANGE;
-	regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
+/* Mark the 'variable offset' part of a register as zero.  This should be
+ * used only on registers holding a pointer type.
+ */
+static void __mark_reg_known_zero(struct bpf_reg_state *reg)
+{
+	reg->var_off = tnum_const(0);
+	reg->min_value = 0;
+	reg->max_value = 0;
 }
 
-static void init_reg_state(struct bpf_reg_state *regs)
+static void mark_reg_known_zero(struct bpf_reg_state *regs, u32 regno)
 {
-	int i;
-
-	for (i = 0; i < MAX_BPF_REG; i++)
-		mark_reg_not_init(regs, i);
-
-	/* frame pointer */
-	regs[BPF_REG_FP].type = FRAME_PTR;
+	if (WARN_ON(regno >= MAX_BPF_REG)) {
+		verbose("mark_reg_known_zero(regs, %u)\n", regno);
+		/* Something bad happened, let's kill all regs */
+		for (regno = 0; regno < MAX_BPF_REG; regno++)
+			__mark_reg_not_init(regs + regno);
+		return;
+	}
+	__mark_reg_known_zero(regs + regno);
+}
 
-	/* 1st arg to a function */
-	regs[BPF_REG_1].type = PTR_TO_CTX;
+/* Mark a register as having a completely unknown (scalar) value. */
+static void __mark_reg_unknown(struct bpf_reg_state *reg)
+{
+	reg->type = SCALAR_VALUE;
+	reg->id = 0;
+	reg->off = 0;
+	reg->var_off = tnum_unknown;
+	reg->min_value = BPF_REGISTER_MIN_RANGE;
+	reg->max_value = BPF_REGISTER_MAX_RANGE;
 }
 
-static void __mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno)
+static void mark_reg_unknown(struct bpf_reg_state *regs, u32 regno)
 {
-	regs[regno].type = UNKNOWN_VALUE;
-	regs[regno].id = 0;
-	regs[regno].imm = 0;
+	if (WARN_ON(regno >= MAX_BPF_REG)) {
+		verbose("mark_reg_unknown(regs, %u)\n", regno);
+		/* Something bad happened, let's kill all regs */
+		for (regno = 0; regno < MAX_BPF_REG; regno++)
+			__mark_reg_not_init(regs + regno);
+		return;
+	}
+	__mark_reg_unknown(regs + regno);
 }
 
-static void mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno)
+static void __mark_reg_not_init(struct bpf_reg_state *reg)
 {
-	BUG_ON(regno >= MAX_BPF_REG);
-	__mark_reg_unknown_value(regs, regno);
+	__mark_reg_unknown(reg);
+	reg->type = NOT_INIT;
 }
 
-static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno)
+static void mark_reg_not_init(struct bpf_reg_state *regs, u32 regno)
 {
-	regs[regno].min_value = BPF_REGISTER_MIN_RANGE;
-	regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
-	regs[regno].min_align = 0;
+	if (WARN_ON(regno >= MAX_BPF_REG)) {
+		verbose("mark_reg_not_init(regs, %u)\n", regno);
+		/* Something bad happened, let's kill all regs */
+		for (regno = 0; regno < MAX_BPF_REG; regno++)
+			__mark_reg_not_init(regs + regno);
+		return;
+	}
+	__mark_reg_not_init(regs + regno);
 }
 
-static void mark_reg_unknown_value_and_range(struct bpf_reg_state *regs,
-					     u32 regno)
+static void init_reg_state(struct bpf_reg_state *regs)
 {
-	mark_reg_unknown_value(regs, regno);
-	reset_reg_range_values(regs, regno);
+	int i;
+
+	for (i = 0; i < MAX_BPF_REG; i++)
+		mark_reg_not_init(regs, i);
+
+	/* frame pointer */
+	regs[BPF_REG_FP].type = PTR_TO_STACK;
+	mark_reg_known_zero(regs, BPF_REG_FP);
+
+	/* 1st arg to a function */
+	regs[BPF_REG_1].type = PTR_TO_CTX;
+	mark_reg_known_zero(regs, BPF_REG_1);
 }
 
 enum reg_arg_type {
@@ -541,7 +572,7 @@ static int check_reg_arg(struct bpf_reg_state *regs, u32 regno,
 			return -EACCES;
 		}
 		if (t == DST_OP)
-			mark_reg_unknown_value(regs, regno);
+			mark_reg_unknown(regs, regno);
 	}
 	return 0;
 }
@@ -565,12 +596,10 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
 	switch (type) {
 	case PTR_TO_MAP_VALUE:
 	case PTR_TO_MAP_VALUE_OR_NULL:
-	case PTR_TO_MAP_VALUE_ADJ:
 	case PTR_TO_STACK:
 	case PTR_TO_CTX:
 	case PTR_TO_PACKET:
 	case PTR_TO_PACKET_END:
-	case FRAME_PTR:
 	case CONST_PTR_TO_MAP:
 		return true;
 	default:
@@ -650,14 +679,13 @@ static int check_stack_read(struct bpf_verifier_state *state, int off, int size,
 		}
 		if (value_regno >= 0)
 			/* have read misc data from the stack */
-			mark_reg_unknown_value_and_range(state->regs,
-							 value_regno);
+			mark_reg_unknown(state->regs, value_regno);
 		return 0;
 	}
 }
 
 /* check read/write into map element returned by bpf_map_lookup_elem() */
-static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
+static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
 			    int size)
 {
 	struct bpf_map *map = env->cur_state.regs[regno].map_ptr;
@@ -670,22 +698,25 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
 	return 0;
 }
 
-/* check read/write into an adjusted map element */
-static int check_map_access_adj(struct bpf_verifier_env *env, u32 regno,
+/* check read/write into a map element with possible variable offset */
+static int check_map_access(struct bpf_verifier_env *env, u32 regno,
 				int off, int size)
 {
 	struct bpf_verifier_state *state = &env->cur_state;
 	struct bpf_reg_state *reg = &state->regs[regno];
 	int err;
 
-	/* We adjusted the register to this map value, so we
-	 * need to change off and size to min_value and max_value
-	 * respectively to make sure our theoretical access will be
-	 * safe.
+	/* We may have adjusted the register to this map value, so we
+	 * need to try adding each of min_value and max_value to off
+	 * to make sure our theoretical access will be safe.
 	 */
 	if (log_level)
 		print_verifier_state(state);
-	env->varlen_map_value_access = true;
+	/* If the offset is variable, we will need to be stricter in state
+	 * pruning from now on.
+	 */
+	if (!tnum_is_const(reg->var_off))
+		env->varlen_map_value_access = true;
 	/* The minimum value is only important with signed
 	 * comparisons where we can't assume the floor of a
 	 * value is 0.  If we are using signed variables for our
@@ -697,10 +728,9 @@ static int check_map_access_adj(struct bpf_verifier_env *env, u32 regno,
 			regno);
 		return -EACCES;
 	}
-	err = check_map_access(env, regno, reg->min_value + off, size);
+	err = __check_map_access(env, regno, reg->min_value + off, size);
 	if (err) {
-		verbose("R%d min value is outside of the array range\n",
-			regno);
+		verbose("R%d min value is outside of the array range\n", regno);
 		return err;
 	}
 
@@ -712,7 +742,10 @@ static int check_map_access_adj(struct bpf_verifier_env *env, u32 regno,
 			regno);
 		return -EACCES;
 	}
-	return check_map_access(env, regno, reg->max_value + off, size);
+	err = __check_map_access(env, regno, reg->max_value + off, size);
+	if (err)
+		verbose("R%d max value is outside of the array range\n", regno);
+	return err;
 }
 
 #define MAX_PACKET_OFF 0xffff
@@ -742,14 +775,14 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
 	}
 }
 
-static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
-			       int size)
+static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
+				 int off, int size)
 {
 	struct bpf_reg_state *regs = env->cur_state.regs;
 	struct bpf_reg_state *reg = &regs[regno];
 
-	off += reg->off;
-	if (off < 0 || size <= 0 || off + size > reg->range) {
+	if (off < 0 || size <= 0 || off > MAX_PACKET_OFF ||
+	    off + size > reg->range) {
 		verbose("invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
 			off, size, regno, reg->id, reg->off, reg->range);
 		return -EACCES;
@@ -757,7 +790,35 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
 	return 0;
 }
 
-/* check access to 'struct bpf_context' fields */
+static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
+			       int size)
+{
+	struct bpf_reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *reg = &regs[regno];
+	int err;
+
+	/* We may have added a variable offset to the packet pointer; but any
+	 * reg->range we have comes after that.  We are only checking the fixed
+	 * offset.
+	 */
+
+	/* We don't allow negative numbers, because we aren't tracking enough
+	 * detail to prove they're safe.
+	 */
+	if (reg->min_value < 0) {
+		verbose("R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
+			regno);
+		return -EACCES;
+	}
+	err = __check_packet_access(env, regno, off, size);
+	if (err) {
+		verbose("R%d offset is outside of the packet\n", regno);
+		return err;
+	}
+	return err;
+}
+
+/* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
 			    enum bpf_access_type t, enum bpf_reg_type *reg_type)
 {
@@ -793,35 +854,19 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
 	if (env->allow_ptr_leaks)
 		return false;
 
-	switch (env->cur_state.regs[regno].type) {
-	case UNKNOWN_VALUE:
-	case CONST_IMM:
-		return false;
-	default:
-		return true;
-	}
+	return env->cur_state.regs[regno].type != SCALAR_VALUE;
 }
 
 static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
 				   int off, int size, bool strict)
 {
+	struct tnum reg_off;
 	int ip_align;
-	int reg_off;
 
 	/* Byte size accesses are always allowed. */
 	if (!strict || size == 1)
 		return 0;
 
-	reg_off = reg->off;
-	if (reg->id) {
-		if (reg->aux_off_align % size) {
-			verbose("Packet access is only %u byte aligned, %d byte access not allowed\n",
-				reg->aux_off_align, size);
-			return -EACCES;
-		}
-		reg_off += reg->aux_off;
-	}
-
 	/* For platforms that do not have a Kconfig enabling
 	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
 	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
@@ -831,20 +876,37 @@ static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
 	 * unconditional IP align value of '2'.
 	 */
 	ip_align = 2;
-	if ((ip_align + reg_off + off) % size != 0) {
-		verbose("misaligned packet access off %d+%d+%d size %d\n",
-			ip_align, reg_off, off, size);
+
+	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
+	if (!tnum_is_aligned(reg_off, size)) {
+		char tn_buf[48];
+
+		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+		verbose("misaligned packet access off %d+%s+%d+%d size %d\n",
+			ip_align, tn_buf, reg->off, off, size);
 		return -EACCES;
 	}
 
 	return 0;
 }
 
-static int check_val_ptr_alignment(const struct bpf_reg_state *reg,
-				   int size, bool strict)
+static int check_generic_ptr_alignment(const struct bpf_reg_state *reg,
+				       const char *pointer_desc,
+				       int off, int size, bool strict)
 {
-	if (strict && size != 1) {
-		verbose("Unknown alignment. Only byte-sized access allowed in value access.\n");
+	struct tnum reg_off;
+
+	/* Byte size accesses are always allowed. */
+	if (!strict || size == 1)
+		return 0;
+
+	reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
+	if (!tnum_is_aligned(reg_off, size)) {
+		char tn_buf[48];
+
+		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+		verbose("misaligned %saccess off %s+%d+%d size %d\n",
+			pointer_desc, tn_buf, reg->off, off, size);
 		return -EACCES;
 	}
 
@@ -856,21 +918,25 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
 			       int off, int size)
 {
 	bool strict = env->strict_alignment;
+	const char *pointer_desc = "";
 
 	switch (reg->type) {
 	case PTR_TO_PACKET:
+		/* special case, because of NET_IP_ALIGN */
 		return check_pkt_ptr_alignment(reg, off, size, strict);
-	case PTR_TO_MAP_VALUE_ADJ:
-		return check_val_ptr_alignment(reg, size, strict);
+	case PTR_TO_MAP_VALUE:
+		pointer_desc = "value ";
+		break;
+	case PTR_TO_CTX:
+		pointer_desc = "context ";
+		break;
+	case PTR_TO_STACK:
+		pointer_desc = "stack ";
+		break;
 	default:
-		if (off % size != 0) {
-			verbose("misaligned access off %d size %d\n",
-				off, size);
-			return -EACCES;
-		}
-
-		return 0;
+		break;
 	}
+	return check_generic_ptr_alignment(reg, pointer_desc, off, size, strict);
 }
 
 /* check whether memory at (regno + off) is accessible for t = (read | write)
@@ -887,52 +953,79 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 	struct bpf_reg_state *reg = &state->regs[regno];
 	int size, err = 0;
 
-	if (reg->type == PTR_TO_STACK)
-		off += reg->imm;
-
 	size = bpf_size_to_bytes(bpf_size);
 	if (size < 0)
 		return size;
 
+	/* alignment checks will add in reg->off themselves */
 	err = check_ptr_alignment(env, reg, off, size);
 	if (err)
 		return err;
 
-	if (reg->type == PTR_TO_MAP_VALUE ||
-	    reg->type == PTR_TO_MAP_VALUE_ADJ) {
+	/* for access checks, reg->off is just part of off */
+	off += reg->off;
+
+	if (reg->type == PTR_TO_MAP_VALUE) {
 		if (t == BPF_WRITE && value_regno >= 0 &&
 		    is_pointer_value(env, value_regno)) {
 			verbose("R%d leaks addr into map\n", value_regno);
 			return -EACCES;
 		}
 
-		if (reg->type == PTR_TO_MAP_VALUE_ADJ)
-			err = check_map_access_adj(env, regno, off, size);
-		else
-			err = check_map_access(env, regno, off, size);
+		err = check_map_access(env, regno, off, size);
 		if (!err && t == BPF_READ && value_regno >= 0)
-			mark_reg_unknown_value_and_range(state->regs,
-							 value_regno);
+			mark_reg_unknown(state->regs, value_regno);
 
 	} else if (reg->type == PTR_TO_CTX) {
-		enum bpf_reg_type reg_type = UNKNOWN_VALUE;
+		enum bpf_reg_type reg_type = SCALAR_VALUE;
 
 		if (t == BPF_WRITE && value_regno >= 0 &&
 		    is_pointer_value(env, value_regno)) {
 			verbose("R%d leaks addr into ctx\n", value_regno);
 			return -EACCES;
 		}
+		/* ctx accesses must be at a fixed offset, so that we can
+		 * determine what type of data were returned.
+		 */
+		if (!tnum_is_const(reg->var_off)) {
+			char tn_buf[48];
+
+			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+			verbose("variable ctx access var_off=%s off=%d size=%d",
+				tn_buf, off, size);
+			return -EACCES;
+		}
+		off += reg->var_off.value;
 		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
 		if (!err && t == BPF_READ && value_regno >= 0) {
-			mark_reg_unknown_value_and_range(state->regs,
-							 value_regno);
-			/* note that reg.[id|off|range] == 0 */
+			/* ctx access returns either a scalar, or a
+			 * PTR_TO_PACKET[_END].  In the latter case, we know
+			 * the offset is zero.
+			 */
+			if (reg_type == SCALAR_VALUE)
+				mark_reg_unknown(state->regs, value_regno);
+			else
+				mark_reg_known_zero(state->regs, value_regno);
+			state->regs[value_regno].id = 0;
+			state->regs[value_regno].off = 0;
+			state->regs[value_regno].range = 0;
 			state->regs[value_regno].type = reg_type;
-			state->regs[value_regno].aux_off = 0;
-			state->regs[value_regno].aux_off_align = 0;
 		}
 
-	} else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) {
+	} else if (reg->type == PTR_TO_STACK) {
+		/* stack accesses must be at a fixed offset, so that we can
+		 * determine what type of data were returned.
+		 * See check_stack_read().
+		 */
+		if (!tnum_is_const(reg->var_off)) {
+			char tn_buf[48];
+
+			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+			verbose("variable stack access var_off=%s off=%d size=%d",
+				tn_buf, off, size);
+			return -EACCES;
+		}
+		off += reg->var_off.value;
 		if (off >= 0 || off < -MAX_BPF_STACK) {
 			verbose("invalid stack off=%d size=%d\n", off, size);
 			return -EACCES;
@@ -952,7 +1045,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 		} else {
 			err = check_stack_read(state, off, size, value_regno);
 		}
-	} else if (state->regs[regno].type == PTR_TO_PACKET) {
+	} else if (reg->type == PTR_TO_PACKET) {
 		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
 			verbose("cannot write into packet\n");
 			return -EACCES;
@@ -964,21 +1057,24 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 		}
 		err = check_packet_access(env, regno, off, size);
 		if (!err && t == BPF_READ && value_regno >= 0)
-			mark_reg_unknown_value_and_range(state->regs,
-							 value_regno);
+			mark_reg_unknown(state->regs, value_regno);
 	} else {
 		verbose("R%d invalid mem access '%s'\n",
 			regno, reg_type_str[reg->type]);
 		return -EACCES;
 	}
 
-	if (!err && size <= 2 && value_regno >= 0 && env->allow_ptr_leaks &&
-	    state->regs[value_regno].type == UNKNOWN_VALUE) {
-		/* 1 or 2 byte load zero-extends, determine the number of
-		 * zero upper bits. Not doing it fo 4 byte load, since
-		 * such values cannot be added to ptr_to_packet anyway.
-		 */
-		state->regs[value_regno].imm = 64 - size * 8;
+	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
+	    state->regs[value_regno].type == SCALAR_VALUE) {
+		/* b/h/w load zero-extends, mark upper bits as known 0 */
+		state->regs[value_regno].var_off = tnum_cast(
+					state->regs[value_regno].var_off, size);
+		/* sign bit is known zero, so we can bound the value */
+		state->regs[value_regno].min_value = 0;
+		state->regs[value_regno].max_value = min_t(u64,
+					state->regs[value_regno].var_off.value |
+					state->regs[value_regno].var_off.mask,
+					BPF_REGISTER_MAX_RANGE);
 	}
 	return err;
 }
@@ -1015,9 +1111,17 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
 				BPF_SIZE(insn->code), BPF_WRITE, -1);
 }
 
+/* Does this register contain a constant zero? */
+static bool register_is_null(struct bpf_reg_state reg)
+{
+	return reg.type == SCALAR_VALUE && tnum_equals_const(reg.var_off, 0);
+}
+
 /* when register 'regno' is passed into function that will read 'access_size'
  * bytes from that pointer, make sure that it's within stack boundary
- * and all elements of stack are initialized
+ * and all elements of stack are initialized.
+ * Unlike most pointer bounds-checking functions, this one doesn't take an
+ * 'off' argument, so it has to add in reg->off itself.
  */
 static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
 				int access_size, bool zero_size_allowed,
@@ -1028,9 +1132,9 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
 	int off, i;
 
 	if (regs[regno].type != PTR_TO_STACK) {
+		/* Allow zero-byte read from NULL, regardless of pointer type */
 		if (zero_size_allowed && access_size == 0 &&
-		    regs[regno].type == CONST_IMM &&
-		    regs[regno].imm  == 0)
+		    register_is_null(regs[regno]))
 			return 0;
 
 		verbose("R%d type=%s expected=%s\n", regno,
@@ -1039,7 +1143,15 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
 		return -EACCES;
 	}
 
-	off = regs[regno].imm;
+	/* Only allow fixed-offset stack reads */
+	if (!tnum_is_const(regs[regno].var_off)) {
+		char tn_buf[48];
+
+		tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off);
+		verbose("invalid variable stack read R%d var_off=%s\n",
+			regno, tn_buf);
+	}
+	off = regs[regno].off + regs[regno].var_off.value;
 	if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
 	    access_size <= 0) {
 		verbose("invalid stack type R%d off=%d access_size=%d\n",
@@ -1070,16 +1182,14 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
 				   int access_size, bool zero_size_allowed,
 				   struct bpf_call_arg_meta *meta)
 {
-	struct bpf_reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *regs = env->cur_state.regs, *reg = &regs[regno];
 
-	switch (regs[regno].type) {
+	switch (reg->type) {
 	case PTR_TO_PACKET:
-		return check_packet_access(env, regno, 0, access_size);
+		return check_packet_access(env, regno, reg->off, access_size);
 	case PTR_TO_MAP_VALUE:
-		return check_map_access(env, regno, 0, access_size);
-	case PTR_TO_MAP_VALUE_ADJ:
-		return check_map_access_adj(env, regno, 0, access_size);
-	default: /* const_imm|ptr_to_stack or invalid ptr */
+		return check_map_access(env, regno, reg->off, access_size);
+	default: /* scalar_value|ptr_to_stack or invalid ptr */
 		return check_stack_boundary(env, regno, access_size,
 					    zero_size_allowed, meta);
 	}
@@ -1122,11 +1232,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			goto err_type;
 	} else if (arg_type == ARG_CONST_SIZE ||
 		   arg_type == ARG_CONST_SIZE_OR_ZERO) {
-		expected_type = CONST_IMM;
-		/* One exception. Allow UNKNOWN_VALUE registers when the
-		 * boundaries are known and don't cause unsafe memory accesses
-		 */
-		if (type != UNKNOWN_VALUE && type != expected_type)
+		expected_type = SCALAR_VALUE;
+		if (type != expected_type)
 			goto err_type;
 	} else if (arg_type == ARG_CONST_MAP_PTR) {
 		expected_type = CONST_PTR_TO_MAP;
@@ -1140,13 +1247,13 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 		   arg_type == ARG_PTR_TO_UNINIT_MEM) {
 		expected_type = PTR_TO_STACK;
 		/* One exception here. In case function allows for NULL to be
-		 * passed in as argument, it's a CONST_IMM type. Final test
+		 * passed in as argument, it's a SCALAR_VALUE type. Final test
 		 * happens during stack boundary checking.
 		 */
-		if (type == CONST_IMM && reg->imm == 0)
+		if (register_is_null(*reg))
 			/* final test in check_stack_boundary() */;
 		else if (type != PTR_TO_PACKET && type != PTR_TO_MAP_VALUE &&
-			 type != PTR_TO_MAP_VALUE_ADJ && type != expected_type)
+			 type != expected_type)
 			goto err_type;
 		meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
 	} else {
@@ -1172,7 +1279,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			return -EACCES;
 		}
 		if (type == PTR_TO_PACKET)
-			err = check_packet_access(env, regno, 0,
+			err = check_packet_access(env, regno, reg->off,
 						  meta->map_ptr->key_size);
 		else
 			err = check_stack_boundary(env, regno,
@@ -1188,7 +1295,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			return -EACCES;
 		}
 		if (type == PTR_TO_PACKET)
-			err = check_packet_access(env, regno, 0,
+			err = check_packet_access(env, regno, reg->off,
 						  meta->map_ptr->value_size);
 		else
 			err = check_stack_boundary(env, regno,
@@ -1208,10 +1315,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			return -EACCES;
 		}
 
-		/* If the register is UNKNOWN_VALUE, the access check happens
-		 * using its boundaries. Otherwise, just use its imm
+		/* The register is SCALAR_VALUE; the access check
+		 * happens using its boundaries.
 		 */
-		if (type == UNKNOWN_VALUE) {
+
+		if (!tnum_is_const(reg->var_off))
 			/* For unprivileged variable accesses, disable raw
 			 * mode so that the program is required to
 			 * initialize all the memory that the helper could
@@ -1219,35 +1327,28 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			 */
 			meta = NULL;
 
-			if (reg->min_value < 0) {
-				verbose("R%d min value is negative, either use unsigned or 'var &= const'\n",
-					regno);
-				return -EACCES;
-			}
-
-			if (reg->min_value == 0) {
-				err = check_helper_mem_access(env, regno - 1, 0,
-							      zero_size_allowed,
-							      meta);
-				if (err)
-					return err;
-			}
+		if (reg->min_value < 0) {
+			verbose("R%d min value is negative, either use unsigned or 'var &= const'\n",
+				regno);
+			return -EACCES;
+		}
 
-			if (reg->max_value == BPF_REGISTER_MAX_RANGE) {
-				verbose("R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
-					regno);
-				return -EACCES;
-			}
-			err = check_helper_mem_access(env, regno - 1,
-						      reg->max_value,
-						      zero_size_allowed, meta);
+		if (reg->min_value == 0) {
+			err = check_helper_mem_access(env, regno - 1, 0,
+						      zero_size_allowed,
+						      meta);
 			if (err)
 				return err;
-		} else {
-			/* register is CONST_IMM */
-			err = check_helper_mem_access(env, regno - 1, reg->imm,
-						      zero_size_allowed, meta);
 		}
+
+		if (reg->max_value == BPF_REGISTER_MAX_RANGE) {
+			verbose("R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
+				regno);
+			return -EACCES;
+		}
+		err = check_helper_mem_access(env, regno - 1,
+					      reg->max_value,
+					      zero_size_allowed, meta);
 	}
 
 	return err;
@@ -1339,6 +1440,9 @@ static int check_raw_mode(const struct bpf_func_proto *fn)
 	return count > 1 ? -EINVAL : 0;
 }
 
+/* Packet data might have moved, any old PTR_TO_PACKET[_END] are now invalid,
+ * so turn them into unknown SCALAR_VALUE.
+ */
 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
 {
 	struct bpf_verifier_state *state = &env->cur_state;
@@ -1348,7 +1452,7 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
 	for (i = 0; i < MAX_BPF_REG; i++)
 		if (regs[i].type == PTR_TO_PACKET ||
 		    regs[i].type == PTR_TO_PACKET_END)
-			mark_reg_unknown_value(regs, i);
+			mark_reg_unknown(regs, i);
 
 	for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
 		if (state->stack_slot_type[i] != STACK_SPILL)
@@ -1357,8 +1461,7 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
 		if (reg->type != PTR_TO_PACKET &&
 		    reg->type != PTR_TO_PACKET_END)
 			continue;
-		__mark_reg_unknown_value(state->spilled_regs,
-					 i / BPF_REG_SIZE);
+		__mark_reg_unknown(reg);
 	}
 }
 
@@ -1438,14 +1541,17 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
 
 	/* update return register */
 	if (fn->ret_type == RET_INTEGER) {
-		regs[BPF_REG_0].type = UNKNOWN_VALUE;
+		/* sets type to SCALAR_VALUE */
+		mark_reg_unknown(regs, BPF_REG_0);
 	} else if (fn->ret_type == RET_VOID) {
 		regs[BPF_REG_0].type = NOT_INIT;
 	} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
 		struct bpf_insn_aux_data *insn_aux;
 
 		regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
-		regs[BPF_REG_0].max_value = regs[BPF_REG_0].min_value = 0;
+		/* There is no offset yet applied, variable or fixed */
+		mark_reg_known_zero(regs, BPF_REG_0);
+		regs[BPF_REG_0].off = 0;
 		/* remember map_ptr, so that check_map_access()
 		 * can check 'value_size' boundary of memory access
 		 * to map element returned from bpf_map_lookup_elem()
@@ -1476,371 +1582,336 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
 	return 0;
 }
 
-static int check_packet_ptr_add(struct bpf_verifier_env *env,
-				struct bpf_insn *insn)
+static void check_reg_overflow(struct bpf_reg_state *reg)
 {
-	struct bpf_reg_state *regs = env->cur_state.regs;
-	struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
-	struct bpf_reg_state *src_reg = &regs[insn->src_reg];
-	struct bpf_reg_state tmp_reg;
-	s32 imm;
-
-	if (BPF_SRC(insn->code) == BPF_K) {
-		/* pkt_ptr += imm */
-		imm = insn->imm;
-
-add_imm:
-		if (imm < 0) {
-			verbose("addition of negative constant to packet pointer is not allowed\n");
-			return -EACCES;
-		}
-		if (imm >= MAX_PACKET_OFF ||
-		    imm + dst_reg->off >= MAX_PACKET_OFF) {
-			verbose("constant %d is too large to add to packet pointer\n",
-				imm);
-			return -EACCES;
-		}
-		/* a constant was added to pkt_ptr.
-		 * Remember it while keeping the same 'id'
-		 */
-		dst_reg->off += imm;
-	} else {
-		bool had_id;
-
-		if (src_reg->type == PTR_TO_PACKET) {
-			/* R6=pkt(id=0,off=0,r=62) R7=imm22; r7 += r6 */
-			tmp_reg = *dst_reg;  /* save r7 state */
-			*dst_reg = *src_reg; /* copy pkt_ptr state r6 into r7 */
-			src_reg = &tmp_reg;  /* pretend it's src_reg state */
-			/* if the checks below reject it, the copy won't matter,
-			 * since we're rejecting the whole program. If all ok,
-			 * then imm22 state will be added to r7
-			 * and r7 will be pkt(id=0,off=22,r=62) while
-			 * r6 will stay as pkt(id=0,off=0,r=62)
-			 */
-		}
-
-		if (src_reg->type == CONST_IMM) {
-			/* pkt_ptr += reg where reg is known constant */
-			imm = src_reg->imm;
-			goto add_imm;
-		}
-		/* disallow pkt_ptr += reg
-		 * if reg is not uknown_value with guaranteed zero upper bits
-		 * otherwise pkt_ptr may overflow and addition will become
-		 * subtraction which is not allowed
-		 */
-		if (src_reg->type != UNKNOWN_VALUE) {
-			verbose("cannot add '%s' to ptr_to_packet\n",
-				reg_type_str[src_reg->type]);
-			return -EACCES;
-		}
-		if (src_reg->imm < 48) {
-			verbose("cannot add integer value with %lld upper zero bits to ptr_to_packet\n",
-				src_reg->imm);
-			return -EACCES;
-		}
-
-		had_id = (dst_reg->id != 0);
-
-		/* dst_reg stays as pkt_ptr type and since some positive
-		 * integer value was added to the pointer, increment its 'id'
-		 */
-		dst_reg->id = ++env->id_gen;
+	if (reg->max_value > BPF_REGISTER_MAX_RANGE)
+		reg->max_value = BPF_REGISTER_MAX_RANGE;
+	if (reg->min_value < BPF_REGISTER_MIN_RANGE ||
+	    reg->min_value > BPF_REGISTER_MAX_RANGE)
+		reg->min_value = BPF_REGISTER_MIN_RANGE;
+}
 
-		/* something was added to pkt_ptr, set range to zero */
-		dst_reg->aux_off += dst_reg->off;
-		dst_reg->off = 0;
-		dst_reg->range = 0;
-		if (had_id)
-			dst_reg->aux_off_align = min(dst_reg->aux_off_align,
-						     src_reg->min_align);
-		else
-			dst_reg->aux_off_align = src_reg->min_align;
+static void coerce_reg_to_32(struct bpf_reg_state *reg)
+{
+	/* 32-bit values can't be negative as an s64 */
+	if (reg->min_value < 0)
+		reg->min_value = 0;
+	/* clear high 32 bits */
+	reg->var_off = tnum_cast(reg->var_off, 4);
+	/* Did value become known?  Then update bounds */
+	if (tnum_is_const(reg->var_off)) {
+		if ((s64)reg->var_off.value > BPF_REGISTER_MIN_RANGE)
+			reg->min_value = reg->var_off.value;
+		if (reg->var_off.value < BPF_REGISTER_MAX_RANGE)
+			reg->max_value = reg->var_off.value;
 	}
-	return 0;
 }
 
-static int evaluate_reg_alu(struct bpf_verifier_env *env, struct bpf_insn *insn)
+/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
+ * Caller must check_reg_overflow all argument regs beforehand.
+ * Caller should also handle BPF_MOV case separately.
+ * If we return -EACCES, caller may want to try again treating pointer as a
+ * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
+ */
+static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
+				   struct bpf_insn *insn,
+				   struct bpf_reg_state *ptr_reg,
+				   struct bpf_reg_state *off_reg)
 {
-	struct bpf_reg_state *regs = env->cur_state.regs;
-	struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
+	struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg;
+	bool known = tnum_is_const(off_reg->var_off);
+	s64 min_val = off_reg->min_value;
+	u64 max_val = off_reg->max_value;
 	u8 opcode = BPF_OP(insn->code);
-	s64 imm_log2;
+	u32 dst = insn->dst_reg;
 
-	/* for type == UNKNOWN_VALUE:
-	 * imm > 0 -> number of zero upper bits
-	 * imm == 0 -> don't track which is the same as all bits can be non-zero
-	 */
+	dst_reg = &regs[dst];
 
-	if (BPF_SRC(insn->code) == BPF_X) {
-		struct bpf_reg_state *src_reg = &regs[insn->src_reg];
-
-		if (src_reg->type == UNKNOWN_VALUE && src_reg->imm > 0 &&
-		    dst_reg->imm && opcode == BPF_ADD) {
-			/* dreg += sreg
-			 * where both have zero upper bits. Adding them
-			 * can only result making one more bit non-zero
-			 * in the larger value.
-			 * Ex. 0xffff (imm=48) + 1 (imm=63) = 0x10000 (imm=47)
-			 *     0xffff (imm=48) + 0xffff = 0x1fffe (imm=47)
-			 */
-			dst_reg->imm = min(dst_reg->imm, src_reg->imm);
-			dst_reg->imm--;
-			return 0;
-		}
-		if (src_reg->type == CONST_IMM && src_reg->imm > 0 &&
-		    dst_reg->imm && opcode == BPF_ADD) {
-			/* dreg += sreg
-			 * where dreg has zero upper bits and sreg is const.
-			 * Adding them can only result making one more bit
-			 * non-zero in the larger value.
-			 */
-			imm_log2 = __ilog2_u64((long long)src_reg->imm);
-			dst_reg->imm = min(dst_reg->imm, 63 - imm_log2);
-			dst_reg->imm--;
-			return 0;
-		}
-		/* all other cases non supported yet, just mark dst_reg */
-		dst_reg->imm = 0;
-		return 0;
+	if (WARN_ON_ONCE(known && (min_val != max_val))) {
+		print_verifier_state(&env->cur_state);
+		verbose("verifier internal error\n");
+		return -EINVAL;
+	}
+
+	if (BPF_CLASS(insn->code) != BPF_ALU64) {
+		/* 32-bit ALU ops on pointers produce (meaningless) scalars */
+		if (!env->allow_ptr_leaks)
+			verbose("R%d 32-bit pointer arithmetic prohibited\n",
+				dst);
+		return -EACCES;
+	}
+
+	if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
+		if (!env->allow_ptr_leaks)
+			verbose("R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
+				dst);
+		return -EACCES;
+	}
+	if (ptr_reg->type == CONST_PTR_TO_MAP) {
+		if (!env->allow_ptr_leaks)
+			verbose("R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
+				dst);
+		return -EACCES;
+	}
+	if (ptr_reg->type == PTR_TO_PACKET_END) {
+		if (!env->allow_ptr_leaks)
+			verbose("R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
+				dst);
+		return -EACCES;
 	}
 
-	/* sign extend 32-bit imm into 64-bit to make sure that
-	 * negative values occupy bit 63. Note ilog2() would have
-	 * been incorrect, since sizeof(insn->imm) == 4
+	/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
+	 * The id may be overwritten later if we create a new variable offset.
 	 */
-	imm_log2 = __ilog2_u64((long long)insn->imm);
+	dst_reg->type = ptr_reg->type;
+	dst_reg->id = ptr_reg->id;
 
-	if (dst_reg->imm && opcode == BPF_LSH) {
-		/* reg <<= imm
-		 * if reg was a result of 2 byte load, then its imm == 48
-		 * which means that upper 48 bits are zero and shifting this reg
-		 * left by 4 would mean that upper 44 bits are still zero
+	switch (opcode) {
+	case BPF_ADD:
+		/* We can take a fixed offset as long as it doesn't overflow
+		 * the s32 'off' field
 		 */
-		dst_reg->imm -= insn->imm;
-	} else if (dst_reg->imm && opcode == BPF_MUL) {
-		/* reg *= imm
-		 * if multiplying by 14 subtract 4
-		 * This is conservative calculation of upper zero bits.
-		 * It's not trying to special case insn->imm == 1 or 0 cases
+		if (known && (ptr_reg->off + min_val ==
+			      (s64)(s32)(ptr_reg->off + min_val))) {
+			/* pointer += K.  Accumulate it into fixed offset */
+			dst_reg->min_value = ptr_reg->min_value;
+			dst_reg->max_value = ptr_reg->max_value;
+			dst_reg->var_off = ptr_reg->var_off;
+			dst_reg->off = ptr_reg->off + min_val;
+			break;
+		}
+		if (max_val == BPF_REGISTER_MAX_RANGE) {
+			if (!env->allow_ptr_leaks)
+				verbose("R%d tried to add unbounded value to pointer\n",
+					dst);
+			return -EACCES;
+		}
+		/* A new variable offset is created.  Note that off_reg->off
+		 * == 0, since it's a scalar.
+		 * dst_reg gets the pointer type and since some positive
+		 * integer value was added to the pointer, increment its 'id'.
+		 * this creates a new 'base' pointer, off_reg (variable) gets
+		 * added into the variable offset, and we copy the fixed offset
+		 * from ptr_reg.
 		 */
-		dst_reg->imm -= imm_log2 + 1;
-	} else if (opcode == BPF_AND) {
-		/* reg &= imm */
-		dst_reg->imm = 63 - imm_log2;
-	} else if (dst_reg->imm && opcode == BPF_ADD) {
-		/* reg += imm */
-		dst_reg->imm = min(dst_reg->imm, 63 - imm_log2);
-		dst_reg->imm--;
-	} else if (opcode == BPF_RSH) {
-		/* reg >>= imm
-		 * which means that after right shift, upper bits will be zero
-		 * note that verifier already checked that
-		 * 0 <= imm < 64 for shift insn
+		if (min_val <= BPF_REGISTER_MIN_RANGE)
+			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
+		if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
+			dst_reg->min_value += min_val;
+		if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
+			dst_reg->max_value += max_val;
+		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
+		dst_reg->off = ptr_reg->off;
+		dst_reg->id = ++env->id_gen;
+		if (ptr_reg->type == PTR_TO_PACKET)
+			/* something was added to pkt_ptr, set range to zero */
+			dst_reg->range = 0;
+		break;
+	case BPF_SUB:
+		if (dst_reg == off_reg) {
+			/* scalar -= pointer.  Creates an unknown scalar */
+			if (!env->allow_ptr_leaks)
+				verbose("R%d tried to subtract pointer from scalar\n",
+					dst);
+			return -EACCES;
+		}
+		/* We don't allow subtraction from FP, because (according to
+		 * test_verifier.c test "invalid fp arithmetic", JITs might not
+		 * be able to deal with it.
 		 */
-		dst_reg->imm += insn->imm;
-		if (unlikely(dst_reg->imm > 64))
-			/* some dumb code did:
-			 * r2 = *(u32 *)mem;
-			 * r2 >>= 32;
-			 * and all bits are zero now */
-			dst_reg->imm = 64;
-	} else {
-		/* all other alu ops, means that we don't know what will
-		 * happen to the value, mark it with unknown number of zero bits
+		if (ptr_reg->type == PTR_TO_STACK) {
+			if (!env->allow_ptr_leaks)
+				verbose("R%d subtraction from stack pointer prohibited\n",
+					dst);
+			return -EACCES;
+		}
+		if (known && (ptr_reg->off - min_val ==
+			      (s64)(s32)(ptr_reg->off - min_val))) {
+			/* pointer -= K.  Subtract it from fixed offset */
+			dst_reg->min_value = ptr_reg->min_value;
+			dst_reg->max_value = ptr_reg->max_value;
+			dst_reg->var_off = ptr_reg->var_off;
+			dst_reg->id = ptr_reg->id;
+			dst_reg->off = ptr_reg->off - min_val;
+			break;
+		}
+		/* Subtracting a negative value will just confuse everything.
+		 * This can happen if off_reg is an immediate.
 		 */
-		dst_reg->imm = 0;
-	}
-
-	if (dst_reg->imm < 0) {
-		/* all 64 bits of the register can contain non-zero bits
-		 * and such value cannot be added to ptr_to_packet, since it
-		 * may overflow, mark it as unknown to avoid further eval
+		if ((s64)max_val < 0) {
+			if (!env->allow_ptr_leaks)
+				verbose("R%d tried to subtract negative max_val %lld from pointer\n",
+					dst, (s64)max_val);
+			return -EACCES;
+		}
+		/* A new variable offset is created.  If the subtrahend is known
+		 * nonnegative, then any reg->range we had before is still good.
 		 */
-		dst_reg->imm = 0;
-	}
-	return 0;
-}
-
-static int evaluate_reg_imm_alu(struct bpf_verifier_env *env,
-				struct bpf_insn *insn)
-{
-	struct bpf_reg_state *regs = env->cur_state.regs;
-	struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
-	struct bpf_reg_state *src_reg = &regs[insn->src_reg];
-	u8 opcode = BPF_OP(insn->code);
-	u64 dst_imm = dst_reg->imm;
-
-	/* dst_reg->type == CONST_IMM here. Simulate execution of insns
-	 * containing ALU ops. Don't care about overflow or negative
-	 * values, just add/sub/... them; registers are in u64.
-	 */
-	if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_K) {
-		dst_imm += insn->imm;
-	} else if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_X &&
-		   src_reg->type == CONST_IMM) {
-		dst_imm += src_reg->imm;
-	} else if (opcode == BPF_SUB && BPF_SRC(insn->code) == BPF_K) {
-		dst_imm -= insn->imm;
-	} else if (opcode == BPF_SUB && BPF_SRC(insn->code) == BPF_X &&
-		   src_reg->type == CONST_IMM) {
-		dst_imm -= src_reg->imm;
-	} else if (opcode == BPF_MUL && BPF_SRC(insn->code) == BPF_K) {
-		dst_imm *= insn->imm;
-	} else if (opcode == BPF_MUL && BPF_SRC(insn->code) == BPF_X &&
-		   src_reg->type == CONST_IMM) {
-		dst_imm *= src_reg->imm;
-	} else if (opcode == BPF_OR && BPF_SRC(insn->code) == BPF_K) {
-		dst_imm |= insn->imm;
-	} else if (opcode == BPF_OR && BPF_SRC(insn->code) == BPF_X &&
-		   src_reg->type == CONST_IMM) {
-		dst_imm |= src_reg->imm;
-	} else if (opcode == BPF_AND && BPF_SRC(insn->code) == BPF_K) {
-		dst_imm &= insn->imm;
-	} else if (opcode == BPF_AND && BPF_SRC(insn->code) == BPF_X &&
-		   src_reg->type == CONST_IMM) {
-		dst_imm &= src_reg->imm;
-	} else if (opcode == BPF_RSH && BPF_SRC(insn->code) == BPF_K) {
-		dst_imm >>= insn->imm;
-	} else if (opcode == BPF_RSH && BPF_SRC(insn->code) == BPF_X &&
-		   src_reg->type == CONST_IMM) {
-		dst_imm >>= src_reg->imm;
-	} else if (opcode == BPF_LSH && BPF_SRC(insn->code) == BPF_K) {
-		dst_imm <<= insn->imm;
-	} else if (opcode == BPF_LSH && BPF_SRC(insn->code) == BPF_X &&
-		   src_reg->type == CONST_IMM) {
-		dst_imm <<= src_reg->imm;
-	} else {
-		mark_reg_unknown_value(regs, insn->dst_reg);
-		goto out;
+		if (max_val >= BPF_REGISTER_MAX_RANGE)
+			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
+		if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
+			dst_reg->min_value -= max_val;
+		if (min_val <= BPF_REGISTER_MIN_RANGE)
+			dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
+		if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
+			dst_reg->max_value -= min_val;
+		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
+		dst_reg->off = ptr_reg->off;
+		dst_reg->id = ++env->id_gen;
+		if (ptr_reg->type == PTR_TO_PACKET && min_val < 0)
+			/* something was added to pkt_ptr, set range to zero */
+			dst_reg->range = 0;
+		break;
+	case BPF_AND:
+	case BPF_OR:
+	case BPF_XOR:
+		/* bitwise ops on pointers are troublesome, prohibit for now.
+		 * (However, in principle we could allow some cases, e.g.
+		 * ptr &= ~3 which would reduce min_value by 3.)
+		 */
+		if (!env->allow_ptr_leaks)
+			verbose("R%d bitwise operator %s on pointer prohibited\n",
+				dst, bpf_alu_string[opcode >> 4]);
+		return -EACCES;
+	default:
+		/* other operators (e.g. MUL,LSH) produce non-pointer results */
+		if (!env->allow_ptr_leaks)
+			verbose("R%d pointer arithmetic with %s operator prohibited\n",
+				dst, bpf_alu_string[opcode >> 4]);
+		return -EACCES;
 	}
 
-	dst_reg->imm = dst_imm;
-out:
+	check_reg_overflow(dst_reg);
 	return 0;
 }
 
-static void check_reg_overflow(struct bpf_reg_state *reg)
+static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
+				      struct bpf_insn *insn,
+				      struct bpf_reg_state *dst_reg,
+				      struct bpf_reg_state *src_reg)
 {
-	if (reg->max_value > BPF_REGISTER_MAX_RANGE)
-		reg->max_value = BPF_REGISTER_MAX_RANGE;
-	if (reg->min_value < BPF_REGISTER_MIN_RANGE ||
-	    reg->min_value > BPF_REGISTER_MAX_RANGE)
-		reg->min_value = BPF_REGISTER_MIN_RANGE;
-}
-
-static u32 calc_align(u32 imm)
-{
-	if (!imm)
-		return 1U << 31;
-	return imm - ((imm - 1) & imm);
-}
-
-static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
-				    struct bpf_insn *insn)
-{
-	struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg;
+	struct bpf_reg_state *regs = env->cur_state.regs;
 	s64 min_val = BPF_REGISTER_MIN_RANGE;
 	u64 max_val = BPF_REGISTER_MAX_RANGE;
 	u8 opcode = BPF_OP(insn->code);
-	u32 dst_align, src_align;
+	bool src_known, dst_known;
 
-	dst_reg = &regs[insn->dst_reg];
-	src_align = 0;
-	if (BPF_SRC(insn->code) == BPF_X) {
-		check_reg_overflow(&regs[insn->src_reg]);
-		min_val = regs[insn->src_reg].min_value;
-		max_val = regs[insn->src_reg].max_value;
-
-		/* If the source register is a random pointer then the
-		 * min_value/max_value values represent the range of the known
-		 * accesses into that value, not the actual min/max value of the
-		 * register itself.  In this case we have to reset the reg range
-		 * values so we know it is not safe to look at.
-		 */
-		if (regs[insn->src_reg].type != CONST_IMM &&
-		    regs[insn->src_reg].type != UNKNOWN_VALUE) {
-			min_val = BPF_REGISTER_MIN_RANGE;
-			max_val = BPF_REGISTER_MAX_RANGE;
-			src_align = 0;
-		} else {
-			src_align = regs[insn->src_reg].min_align;
-		}
-	} else if (insn->imm < BPF_REGISTER_MAX_RANGE &&
-		   (s64)insn->imm > BPF_REGISTER_MIN_RANGE) {
-		min_val = max_val = insn->imm;
-		src_align = calc_align(insn->imm);
+	if (BPF_CLASS(insn->code) != BPF_ALU64) {
+		/* 32-bit ALU ops are (32,32)->64 */
+		coerce_reg_to_32(dst_reg);
+		coerce_reg_to_32(src_reg);
 	}
-
-	dst_align = dst_reg->min_align;
-
-	/* We don't know anything about what was done to this register, mark it
-	 * as unknown.
-	 */
-	if (min_val == BPF_REGISTER_MIN_RANGE &&
-	    max_val == BPF_REGISTER_MAX_RANGE) {
-		reset_reg_range_values(regs, insn->dst_reg);
-		return;
+	if (BPF_CLASS(insn->code) != BPF_ALU64) {
+		/* 32-bit ALU ops are (32,32)->64 */
+		coerce_reg_to_32(dst_reg);
+		coerce_reg_to_32(src_reg);
 	}
-
-	/* If one of our values was at the end of our ranges then we can't just
-	 * do our normal operations to the register, we need to set the values
-	 * to the min/max since they are undefined.
-	 */
-	if (min_val == BPF_REGISTER_MIN_RANGE)
-		dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
-	if (max_val == BPF_REGISTER_MAX_RANGE)
-		dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
+	min_val = src_reg->min_value;
+	max_val = src_reg->max_value;
+	src_known = tnum_is_const(src_reg->var_off);
+	dst_known = tnum_is_const(dst_reg->var_off);
 
 	switch (opcode) {
 	case BPF_ADD:
+		if (min_val == BPF_REGISTER_MIN_RANGE)
+			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
 		if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
 			dst_reg->min_value += min_val;
+		/* if max_val is MAX_RANGE, this will saturate dst->max */
 		if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
 			dst_reg->max_value += max_val;
-		dst_reg->min_align = min(src_align, dst_align);
+		dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg->var_off);
 		break;
 	case BPF_SUB:
+		if (max_val == BPF_REGISTER_MAX_RANGE)
+			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
 		if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
-			dst_reg->min_value -= min_val;
+			dst_reg->min_value -= max_val;
+		if (min_val == BPF_REGISTER_MIN_RANGE)
+			dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
 		if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
-			dst_reg->max_value -= max_val;
-		dst_reg->min_align = min(src_align, dst_align);
+			dst_reg->max_value -= min_val;
+		dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg->var_off);
 		break;
 	case BPF_MUL:
-		if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
-			dst_reg->min_value *= min_val;
+		if (min_val < 0 || dst_reg->min_value < 0) {
+			/* Ain't nobody got time to multiply that sign */
+			__mark_reg_unknown(dst_reg);
+			break;
+		}
+		dst_reg->min_value *= min_val;
+		/* if max_val is MAX_RANGE, this will saturate dst->max.
+		 * We know MAX_RANGE ** 2 won't overflow a u64, because
+		 * MAX_RANGE itself fits in a u32.
+		 */
+		BUILD_BUG_ON(BPF_REGISTER_MAX_RANGE > (u32)-1);
 		if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
 			dst_reg->max_value *= max_val;
-		dst_reg->min_align = max(src_align, dst_align);
+		dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg->var_off);
 		break;
 	case BPF_AND:
-		/* Disallow AND'ing of negative numbers, ain't nobody got time
-		 * for that.  Otherwise the minimum is 0 and the max is the max
-		 * value we could AND against.
+		if (src_known && dst_known) {
+			u64 value = dst_reg->var_off.value & src_reg->var_off.value;
+
+			dst_reg->var_off = tnum_const(value);
+			dst_reg->min_value = dst_reg->max_value = min_t(u64,
+					value, BPF_REGISTER_MAX_RANGE);
+			break;
+		}
+		/* Lose min_value when AND'ing negative numbers, ain't nobody
+		 * got time for that.  Otherwise we get our minimum from the
+		 * var_off, since that's inherently bitwise.
+		 * Our maximum is the minimum of the operands' maxima.
 		 */
-		if (min_val < 0)
+		dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg->var_off);
+		if (min_val < 0 && dst_reg->min_value < 0)
 			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
 		else
-			dst_reg->min_value = 0;
-		dst_reg->max_value = max_val;
-		dst_reg->min_align = max(src_align, dst_align);
+			dst_reg->min_value = dst_reg->var_off.value;
+		dst_reg->max_value = min(dst_reg->max_value, max_val);
+		break;
+	case BPF_OR:
+		if (src_known && dst_known) {
+			u64 value = dst_reg->var_off.value | src_reg->var_off.value;
+
+			dst_reg->var_off = tnum_const(value);
+			dst_reg->min_value = dst_reg->max_value = min_t(u64,
+					value, BPF_REGISTER_MAX_RANGE);
+			break;
+		}
+		/* Lose ranges when OR'ing negative numbers, ain't nobody got
+		 * time for that.  Otherwise we get our maximum from the var_off,
+		 * and our minimum is the maximum of the operands' minima.
+		 */
+		dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg->var_off);
+		if (min_val < 0 || dst_reg->min_value < 0) {
+			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
+			dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
+		} else {
+			dst_reg->min_value = max(dst_reg->min_value, min_val);
+			dst_reg->max_value = dst_reg->var_off.value | dst_reg->var_off.mask;
+		}
 		break;
 	case BPF_LSH:
+		if (min_val < 0) {
+			/* LSH by a negative number is undefined */
+			mark_reg_unknown(regs, insn->dst_reg);
+			break;
+		}
 		/* Gotta have special overflow logic here, if we're shifting
 		 * more than MAX_RANGE then just assume we have an invalid
 		 * range.
 		 */
 		if (min_val > ilog2(BPF_REGISTER_MAX_RANGE)) {
 			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
-			dst_reg->min_align = 1;
+			dst_reg->var_off = tnum_unknown;
 		} else {
 			if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
 				dst_reg->min_value <<= min_val;
-			if (!dst_reg->min_align)
-				dst_reg->min_align = 1;
-			dst_reg->min_align <<= min_val;
+			if (src_known)
+				dst_reg->var_off = tnum_lshift(dst_reg->var_off, min_val);
+			else
+				dst_reg->var_off = tnum_lshift(tnum_unknown, min_val);
 		}
 		if (max_val > ilog2(BPF_REGISTER_MAX_RANGE))
 			dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
@@ -1848,37 +1919,138 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
 			dst_reg->max_value <<= max_val;
 		break;
 	case BPF_RSH:
-		/* RSH by a negative number is undefined, and the BPF_RSH is an
-		 * unsigned shift, so make the appropriate casts.
-		 */
-		if (min_val < 0 || dst_reg->min_value < 0) {
-			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
+		if (min_val < 0) {
+			/* RSH by a negative number is undefined */
+			mark_reg_unknown(regs, insn->dst_reg);
+			break;
+		}
+		/* BPF_RSH is an unsigned shift, so make the appropriate casts */
+		if (dst_reg->min_value < 0) {
+			if (min_val)
+				/* Sign bit will be cleared */
+				dst_reg->min_value = 0;
 		} else {
 			dst_reg->min_value =
 				(u64)(dst_reg->min_value) >> min_val;
 		}
-		if (min_val < 0) {
-			dst_reg->min_align = 1;
-		} else {
-			dst_reg->min_align >>= (u64) min_val;
-			if (!dst_reg->min_align)
-				dst_reg->min_align = 1;
-		}
-		if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
-			dst_reg->max_value >>= max_val;
+		if (src_known)
+			dst_reg->var_off = tnum_rshift(dst_reg->var_off, min_val);
+		else
+			dst_reg->var_off = tnum_rshift(tnum_unknown, min_val);
+		if (dst_reg->max_value == BPF_REGISTER_MAX_RANGE)
+			dst_reg->max_value = ~0;
+		dst_reg->max_value >>= max_val;
 		break;
 	default:
-		reset_reg_range_values(regs, insn->dst_reg);
+		mark_reg_unknown(regs, insn->dst_reg);
 		break;
 	}
 
 	check_reg_overflow(dst_reg);
+	return 0;
+}
+
+/* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
+ * and var_off.
+ */
+static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
+				   struct bpf_insn *insn)
+{
+	struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg, *src_reg;
+	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
+	u8 opcode = BPF_OP(insn->code);
+	int rc;
+
+	dst_reg = &regs[insn->dst_reg];
+	check_reg_overflow(dst_reg);
+	src_reg = NULL;
+	if (dst_reg->type != SCALAR_VALUE)
+		ptr_reg = dst_reg;
+	if (BPF_SRC(insn->code) == BPF_X) {
+		src_reg = &regs[insn->src_reg];
+		check_reg_overflow(src_reg);
+
+		if (src_reg->type != SCALAR_VALUE) {
+			if (dst_reg->type != SCALAR_VALUE) {
+				/* Combining two pointers by any ALU op yields
+				 * an arbitrary scalar.
+				 */
+				if (!env->allow_ptr_leaks) {
+					verbose("R%d pointer %s pointer prohibited\n",
+						insn->dst_reg,
+						bpf_alu_string[opcode >> 4]);
+					return -EACCES;
+				}
+				mark_reg_unknown(regs, insn->dst_reg);
+				return 0;
+			} else {
+				/* scalar += pointer
+				 * This is legal, but we have to reverse our
+				 * src/dest handling in computing the range
+				 */
+				rc = adjust_ptr_min_max_vals(env, insn,
+							     src_reg, dst_reg);
+				if (rc == -EACCES && env->allow_ptr_leaks) {
+					/* scalar += unknown scalar */
+					__mark_reg_unknown(&off_reg);
+					return adjust_scalar_min_max_vals(
+							env, insn,
+							dst_reg, &off_reg);
+				}
+				return rc;
+			}
+		} else if (ptr_reg) {
+			/* pointer += scalar */
+			rc = adjust_ptr_min_max_vals(env, insn,
+						     dst_reg, src_reg);
+			if (rc == -EACCES && env->allow_ptr_leaks) {
+				/* unknown scalar += scalar */
+				__mark_reg_unknown(dst_reg);
+				return adjust_scalar_min_max_vals(
+						env, insn, dst_reg, src_reg);
+			}
+			return rc;
+		}
+	} else {
+		/* Pretend the src is a reg with a known value, since we only
+		 * need to be able to read from this state.
+		 */
+		off_reg.type = SCALAR_VALUE;
+		off_reg.var_off = tnum_const(insn->imm);
+		off_reg.min_value = insn->imm;
+		off_reg.max_value = insn->imm;
+		src_reg = &off_reg;
+		if (ptr_reg) { /* pointer += K */
+			rc = adjust_ptr_min_max_vals(env, insn,
+						     ptr_reg, src_reg);
+			if (rc == -EACCES && env->allow_ptr_leaks) {
+				/* unknown scalar += K */
+				__mark_reg_unknown(dst_reg);
+				return adjust_scalar_min_max_vals(
+						env, insn, dst_reg, &off_reg);
+			}
+			return rc;
+		}
+	}
+
+	/* Got here implies adding two SCALAR_VALUEs */
+	if (WARN_ON_ONCE(ptr_reg)) {
+		print_verifier_state(&env->cur_state);
+		verbose("verifier internal error: unexpected ptr_reg\n");
+		return -EINVAL;
+	}
+	if (WARN_ON(!src_reg)) {
+		print_verifier_state(&env->cur_state);
+		verbose("verifier internal error: no src_reg\n");
+		return -EINVAL;
+	}
+	return adjust_scalar_min_max_vals(env, insn, dst_reg, src_reg);
 }
 
 /* check validity of 32-bit and 64-bit arithmetic operations */
 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
-	struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg;
+	struct bpf_reg_state *regs = env->cur_state.regs;
 	u8 opcode = BPF_OP(insn->code);
 	int err;
 
@@ -1938,11 +2110,6 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 		if (err)
 			return err;
 
-		/* we are setting our register to something new, we need to
-		 * reset its range values.
-		 */
-		reset_reg_range_values(regs, insn->dst_reg);
-
 		if (BPF_SRC(insn->code) == BPF_X) {
 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
 				/* case: R1 = R2
@@ -1950,23 +2117,29 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 				 */
 				regs[insn->dst_reg] = regs[insn->src_reg];
 			} else {
+				/* R1 = (u32) R2 */
 				if (is_pointer_value(env, insn->src_reg)) {
 					verbose("R%d partial copy of pointer\n",
 						insn->src_reg);
 					return -EACCES;
 				}
-				mark_reg_unknown_value(regs, insn->dst_reg);
+				mark_reg_unknown(regs, insn->dst_reg);
+				/* high 32 bits are known zero.  But this is
+				 * still out of range for max_value, so leave
+				 * that.
+				 */
+				regs[insn->dst_reg].var_off = tnum_cast(
+						regs[insn->dst_reg].var_off, 4);
 			}
 		} else {
 			/* case: R = imm
 			 * remember the value we stored into this reg
 			 */
-			regs[insn->dst_reg].type = CONST_IMM;
-			regs[insn->dst_reg].imm = insn->imm;
-			regs[insn->dst_reg].id = 0;
+			regs[insn->dst_reg].type = SCALAR_VALUE;
+			regs[insn->dst_reg].var_off = tnum_const(insn->imm);
 			regs[insn->dst_reg].max_value = insn->imm;
 			regs[insn->dst_reg].min_value = insn->imm;
-			regs[insn->dst_reg].min_align = calc_align(insn->imm);
+			regs[insn->dst_reg].id = 0;
 		}
 
 	} else if (opcode > BPF_END) {
@@ -2017,68 +2190,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 		if (err)
 			return err;
 
-		dst_reg = &regs[insn->dst_reg];
-
-		/* first we want to adjust our ranges. */
-		adjust_reg_min_max_vals(env, insn);
-
-		/* pattern match 'bpf_add Rx, imm' instruction */
-		if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 &&
-		    dst_reg->type == FRAME_PTR && BPF_SRC(insn->code) == BPF_K) {
-			dst_reg->type = PTR_TO_STACK;
-			dst_reg->imm = insn->imm;
-			return 0;
-		} else if (opcode == BPF_ADD &&
-			   BPF_CLASS(insn->code) == BPF_ALU64 &&
-			   dst_reg->type == PTR_TO_STACK &&
-			   ((BPF_SRC(insn->code) == BPF_X &&
-			     regs[insn->src_reg].type == CONST_IMM) ||
-			    BPF_SRC(insn->code) == BPF_K)) {
-			if (BPF_SRC(insn->code) == BPF_X)
-				dst_reg->imm += regs[insn->src_reg].imm;
-			else
-				dst_reg->imm += insn->imm;
-			return 0;
-		} else if (opcode == BPF_ADD &&
-			   BPF_CLASS(insn->code) == BPF_ALU64 &&
-			   (dst_reg->type == PTR_TO_PACKET ||
-			    (BPF_SRC(insn->code) == BPF_X &&
-			     regs[insn->src_reg].type == PTR_TO_PACKET))) {
-			/* ptr_to_packet += K|X */
-			return check_packet_ptr_add(env, insn);
-		} else if (BPF_CLASS(insn->code) == BPF_ALU64 &&
-			   dst_reg->type == UNKNOWN_VALUE &&
-			   env->allow_ptr_leaks) {
-			/* unknown += K|X */
-			return evaluate_reg_alu(env, insn);
-		} else if (BPF_CLASS(insn->code) == BPF_ALU64 &&
-			   dst_reg->type == CONST_IMM &&
-			   env->allow_ptr_leaks) {
-			/* reg_imm += K|X */
-			return evaluate_reg_imm_alu(env, insn);
-		} else if (is_pointer_value(env, insn->dst_reg)) {
-			verbose("R%d pointer arithmetic prohibited\n",
-				insn->dst_reg);
-			return -EACCES;
-		} else if (BPF_SRC(insn->code) == BPF_X &&
-			   is_pointer_value(env, insn->src_reg)) {
-			verbose("R%d pointer arithmetic prohibited\n",
-				insn->src_reg);
-			return -EACCES;
-		}
-
-		/* If we did pointer math on a map value then just set it to our
-		 * PTR_TO_MAP_VALUE_ADJ type so we can deal with any stores or
-		 * loads to this register appropriately, otherwise just mark the
-		 * register as unknown.
-		 */
-		if (env->allow_ptr_leaks &&
-		    BPF_CLASS(insn->code) == BPF_ALU64 && opcode == BPF_ADD &&
-		    (dst_reg->type == PTR_TO_MAP_VALUE ||
-		     dst_reg->type == PTR_TO_MAP_VALUE_ADJ))
-			dst_reg->type = PTR_TO_MAP_VALUE_ADJ;
-		else
-			mark_reg_unknown_value(regs, insn->dst_reg);
+		return adjust_reg_min_max_vals(env, insn);
 	}
 
 	return 0;
@@ -2090,6 +2202,10 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
 	struct bpf_reg_state *regs = state->regs, *reg;
 	int i;
 
+	if (dst_reg->off < 0)
+		/* This doesn't give us any range */
+		return;
+
 	/* LLVM can generate two kind of checks:
 	 *
 	 * Type 1:
@@ -2123,20 +2239,21 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
 	for (i = 0; i < MAX_BPF_REG; i++)
 		if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id)
 			/* keep the maximum range already checked */
-			regs[i].range = max(regs[i].range, dst_reg->off);
+			regs[i].range = max_t(u32, regs[i].range, dst_reg->off);
 
 	for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
 		if (state->stack_slot_type[i] != STACK_SPILL)
 			continue;
 		reg = &state->spilled_regs[i / BPF_REG_SIZE];
 		if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id)
-			reg->range = max(reg->range, dst_reg->off);
+			reg->range = max_t(u32, reg->range, dst_reg->off);
 	}
 }
 
 /* Adjusts the register min/max values in the case that the dst_reg is the
  * variable register that we are working on, and src_reg is a constant or we're
  * simply doing a BPF_K check.
+ * In JEQ/JNE cases we also adjust the var_off values.
  */
 static void reg_set_min_max(struct bpf_reg_state *true_reg,
 			    struct bpf_reg_state *false_reg, u64 val,
@@ -2148,34 +2265,52 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
 		 * true then we know for sure.
 		 */
 		true_reg->max_value = true_reg->min_value = val;
+		true_reg->var_off = tnum_const(val);
 		break;
 	case BPF_JNE:
 		/* If this is true we know nothing Jon Snow, but if it is false
 		 * we know the value for sure;
 		 */
 		false_reg->max_value = false_reg->min_value = val;
+		false_reg->var_off = tnum_const(val);
 		break;
 	case BPF_JGT:
-		/* Unsigned comparison, the minimum value is 0. */
-		false_reg->min_value = 0;
-		/* fallthrough */
-	case BPF_JSGT:
-		/* If this is false then we know the maximum val is val,
-		 * otherwise we know the min val is val+1.
+		/* Unsigned comparison, can only tell us about max_value (since
+		 * min_value is signed), unless we learn sign bit.
 		 */
 		false_reg->max_value = val;
+		/* If we're not unsigned-greater-than a positive value, then
+		 * we can't be negative.
+		 */
+		if ((s64)val >= 0 && false_reg->min_value < 0)
+			false_reg->min_value = 0;
+		break;
+	case BPF_JSGT:
+		/* Signed comparison, can only tell us about min_value (since
+		 * max_value is unsigned), unless we already know sign bit.
+		 */
 		true_reg->min_value = val + 1;
+		/* If we're not signed-greater than val, and we're not negative,
+		 * then we can't be unsigned-greater than val either.
+		 */
+		if (false_reg->min_value >= 0)
+			false_reg->max_value = val;
 		break;
 	case BPF_JGE:
-		/* Unsigned comparison, the minimum value is 0. */
-		false_reg->min_value = 0;
-		/* fallthrough */
-	case BPF_JSGE:
-		/* If this is false then we know the maximum value is val - 1,
-		 * otherwise we know the mimimum value is val.
-		 */
 		false_reg->max_value = val - 1;
+		/* If we're not unsigned-ge a positive value, then we can't be
+		 * negative.
+		 */
+		if ((s64)val >= 0 && false_reg->min_value < 0)
+			false_reg->min_value = 0;
+		break;
+	case BPF_JSGE:
 		true_reg->min_value = val;
+		/* If we're not signed-ge val, and we're not negative, then we
+		 * can't be unsigned-ge val either.
+		 */
+		if (false_reg->min_value >= 0)
+			false_reg->max_value = val - 1;
 		break;
 	default:
 		break;
@@ -2185,8 +2320,8 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
 	check_reg_overflow(true_reg);
 }
 
-/* Same as above, but for the case that dst_reg is a CONST_IMM reg and src_reg
- * is the variable reg.
+/* Same as above, but for the case that dst_reg holds a constant and src_reg is
+ * the variable reg.
  */
 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
 				struct bpf_reg_state *false_reg, u64 val,
@@ -2198,35 +2333,52 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
 		 * true then we know for sure.
 		 */
 		true_reg->max_value = true_reg->min_value = val;
+		true_reg->var_off = tnum_const(val);
 		break;
 	case BPF_JNE:
 		/* If this is true we know nothing Jon Snow, but if it is false
 		 * we know the value for sure;
 		 */
 		false_reg->max_value = false_reg->min_value = val;
+		false_reg->var_off = tnum_const(val);
 		break;
 	case BPF_JGT:
-		/* Unsigned comparison, the minimum value is 0. */
-		true_reg->min_value = 0;
-		/* fallthrough */
+		/* Unsigned comparison, can only tell us about max_value (since
+		 * min_value is signed), unless we learn sign bit.
+		 */
+		true_reg->max_value = val - 1;
+		/* If a positive value is unsigned-greater-than us, then we
+		 * can't be negative.
+		 */
+		if ((s64)val >= 0 && true_reg->min_value < 0)
+			true_reg->min_value = 0;
+		break;
 	case BPF_JSGT:
-		/*
-		 * If this is false, then the val is <= the register, if it is
-		 * true the register <= to the val.
+		/* Signed comparison, can only tell us about min_value (since
+		 * max_value is unsigned), unless we already know sign bit.
 		 */
 		false_reg->min_value = val;
-		true_reg->max_value = val - 1;
+		/* If val is signed-greater-than us, and we're not negative,
+		 * then val must be unsigned-greater-than us.
+		 */
+		if (true_reg->min_value >= 0)
+			true_reg->max_value = val - 1;
 		break;
 	case BPF_JGE:
-		/* Unsigned comparison, the minimum value is 0. */
-		true_reg->min_value = 0;
-		/* fallthrough */
-	case BPF_JSGE:
-		/* If this is false then constant < register, if it is true then
-		 * the register < constant.
+		true_reg->max_value = val;
+		/* If a positive value is unsigned-ge us, then we can't be
+		 * negative.
 		 */
+		if ((s64)val >= 0 && true_reg->min_value < 0)
+			true_reg->min_value = 0;
+		break;
+	case BPF_JSGE:
 		false_reg->min_value = val + 1;
-		true_reg->max_value = val;
+		/* If val is signed-ge us, and we're not negative, then val
+		 * must be unsigned-ge us.
+		 */
+		if (true_reg->min_value >= 0)
+			true_reg->max_value = val;
 		break;
 	default:
 		break;
@@ -2236,19 +2388,58 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
 	check_reg_overflow(true_reg);
 }
 
+/* Regs are known to be equal, so intersect their min/max/var_off */
+static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
+				  struct bpf_reg_state *dst_reg)
+{
+	src_reg->min_value = dst_reg->min_value = max(src_reg->min_value,
+						      dst_reg->min_value);
+	src_reg->max_value = dst_reg->max_value = min(src_reg->max_value,
+						      dst_reg->max_value);
+	src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
+							     dst_reg->var_off);
+	check_reg_overflow(src_reg);
+	check_reg_overflow(dst_reg);
+}
+
+static void reg_combine_min_max(struct bpf_reg_state *true_src,
+				struct bpf_reg_state *true_dst,
+				struct bpf_reg_state *false_src,
+				struct bpf_reg_state *false_dst,
+				u8 opcode)
+{
+	switch (opcode) {
+	case BPF_JEQ:
+		__reg_combine_min_max(true_src, true_dst);
+		break;
+	case BPF_JNE:
+		__reg_combine_min_max(false_src, false_dst);
+	}
+}
+
 static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id,
-			 enum bpf_reg_type type)
+			 bool is_null)
 {
 	struct bpf_reg_state *reg = &regs[regno];
 
 	if (reg->type == PTR_TO_MAP_VALUE_OR_NULL && reg->id == id) {
-		if (type == UNKNOWN_VALUE) {
-			__mark_reg_unknown_value(regs, regno);
+		/* Old offset (both fixed and variable parts) should
+		 * have been known-zero, because we don't allow pointer
+		 * arithmetic on pointers that might be NULL.
+		 */
+		if (WARN_ON_ONCE(reg->min_value || reg->max_value ||
+				 reg->var_off.value || reg->var_off.mask ||
+				 reg->off)) {
+			reg->min_value = reg->max_value = reg->off = 0;
+			reg->var_off = tnum_const(0);
+		}
+		if (is_null) {
+			reg->type = SCALAR_VALUE;
 		} else if (reg->map_ptr->inner_map_meta) {
 			reg->type = CONST_PTR_TO_MAP;
 			reg->map_ptr = reg->map_ptr->inner_map_meta;
 		} else {
-			reg->type = type;
+			reg->type = PTR_TO_MAP_VALUE;
 		}
 		/* We don't need id from this point onwards anymore, thus we
 		 * should better reset it, so that state pruning has chances
@@ -2262,19 +2453,19 @@ static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id,
  * be folded together at some point.
  */
 static void mark_map_regs(struct bpf_verifier_state *state, u32 regno,
-			  enum bpf_reg_type type)
+			  bool is_null)
 {
 	struct bpf_reg_state *regs = state->regs;
 	u32 id = regs[regno].id;
 	int i;
 
 	for (i = 0; i < MAX_BPF_REG; i++)
-		mark_map_reg(regs, i, id, type);
+		mark_map_reg(regs, i, id, is_null);
 
 	for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
 		if (state->stack_slot_type[i] != STACK_SPILL)
 			continue;
-		mark_map_reg(state->spilled_regs, i / BPF_REG_SIZE, id, type);
+		mark_map_reg(state->spilled_regs, i / BPF_REG_SIZE, id, is_null);
 	}
 }
 
@@ -2324,7 +2515,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 	/* detect if R == 0 where R was initialized to zero earlier */
 	if (BPF_SRC(insn->code) == BPF_K &&
 	    (opcode == BPF_JEQ || opcode == BPF_JNE) &&
-	    dst_reg->type == CONST_IMM && dst_reg->imm == insn->imm) {
+	    dst_reg->type == SCALAR_VALUE &&
+	    tnum_equals_const(dst_reg->var_off, insn->imm)) {
 		if (opcode == BPF_JEQ) {
 			/* if (imm == imm) goto pc+off;
 			 * only follow the goto, ignore fall-through
@@ -2346,17 +2538,30 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 
 	/* detect if we are comparing against a constant value so we can adjust
 	 * our min/max values for our dst register.
+	 * this is only legit if both are scalars (or pointers to the same
+	 * object, I suppose, but we don't support that right now), because
+	 * otherwise the different base pointers mean the offsets aren't
+	 * comparable.
 	 */
 	if (BPF_SRC(insn->code) == BPF_X) {
-		if (regs[insn->src_reg].type == CONST_IMM)
-			reg_set_min_max(&other_branch->regs[insn->dst_reg],
-					dst_reg, regs[insn->src_reg].imm,
-					opcode);
-		else if (dst_reg->type == CONST_IMM)
-			reg_set_min_max_inv(&other_branch->regs[insn->src_reg],
-					    &regs[insn->src_reg], dst_reg->imm,
-					    opcode);
-	} else {
+		if (dst_reg->type == SCALAR_VALUE &&
+		    regs[insn->src_reg].type == SCALAR_VALUE) {
+			if (tnum_is_const(regs[insn->src_reg].var_off))
+				reg_set_min_max(&other_branch->regs[insn->dst_reg],
+						dst_reg, regs[insn->src_reg].var_off.value,
+						opcode);
+			else if (tnum_is_const(dst_reg->var_off))
+				reg_set_min_max_inv(&other_branch->regs[insn->src_reg],
+						    &regs[insn->src_reg],
+						    dst_reg->var_off.value, opcode);
+			else if (opcode == BPF_JEQ || opcode == BPF_JNE)
+				/* Comparing for equality, we can combine knowledge */
+				reg_combine_min_max(&other_branch->regs[insn->src_reg],
+						    &other_branch->regs[insn->dst_reg],
+						    &regs[insn->src_reg],
+						    &regs[insn->dst_reg], opcode);
+		}
+	} else if (dst_reg->type == SCALAR_VALUE) {
 		reg_set_min_max(&other_branch->regs[insn->dst_reg],
 					dst_reg, insn->imm, opcode);
 	}
@@ -2368,10 +2573,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 		/* Mark all identical map registers in each branch as either
 		 * safe or unknown depending R == 0 or R != 0 conditional.
 		 */
-		mark_map_regs(this_branch, insn->dst_reg,
-			      opcode == BPF_JEQ ? PTR_TO_MAP_VALUE : UNKNOWN_VALUE);
-		mark_map_regs(other_branch, insn->dst_reg,
-			      opcode == BPF_JEQ ? UNKNOWN_VALUE : PTR_TO_MAP_VALUE);
+		mark_map_regs(this_branch, insn->dst_reg, opcode == BPF_JNE);
+		mark_map_regs(other_branch, insn->dst_reg, opcode == BPF_JEQ);
 	} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
 		   dst_reg->type == PTR_TO_PACKET &&
 		   regs[insn->src_reg].type == PTR_TO_PACKET_END) {
@@ -2419,8 +2622,11 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	if (insn->src_reg == 0) {
 		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
 
-		regs[insn->dst_reg].type = CONST_IMM;
-		regs[insn->dst_reg].imm = imm;
+		regs[insn->dst_reg].type = SCALAR_VALUE;
+		regs[insn->dst_reg].min_value = imm;
+		regs[insn->dst_reg].max_value = imm;
+		check_reg_overflow(&regs[insn->dst_reg]);
+		regs[insn->dst_reg].var_off = tnum_const(imm);
 		regs[insn->dst_reg].id = 0;
 		return 0;
 	}
@@ -2502,7 +2708,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	/* mark destination R0 register as readable, since it contains
 	 * the value fetched from the packet
 	 */
-	regs[BPF_REG_0].type = UNKNOWN_VALUE;
+	mark_reg_unknown(regs, BPF_REG_0);
 	return 0;
 }
 
@@ -2705,57 +2911,102 @@ static int check_cfg(struct bpf_verifier_env *env)
 	return ret;
 }
 
-/* the following conditions reduce the number of explored insns
- * from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet
- */
-static bool compare_ptrs_to_packet(struct bpf_verifier_env *env,
-				   struct bpf_reg_state *old,
-				   struct bpf_reg_state *cur)
+/* check %cur's range satisfies %old's */
+static bool range_within(struct bpf_reg_state *old,
+			 struct bpf_reg_state *cur)
 {
-	if (old->id != cur->id)
-		return false;
+	return old->min_value <= cur->min_value &&
+	       old->max_value >= cur->max_value;
+}
 
-	/* old ptr_to_packet is more conservative, since it allows smaller
-	 * range. Ex:
-	 * old(off=0,r=10) is equal to cur(off=0,r=20), because
-	 * old(off=0,r=10) means that with range=10 the verifier proceeded
-	 * further and found no issues with the program. Now we're in the same
-	 * spot with cur(off=0,r=20), so we're safe too, since anything further
-	 * will only be looking at most 10 bytes after this pointer.
-	 */
-	if (old->off == cur->off && old->range < cur->range)
+/* Returns true if (rold safe implies rcur safe) */
+static bool regsafe(struct bpf_reg_state *rold,
+		    struct bpf_reg_state *rcur,
+		    bool varlen_map_access)
+{
+	if (memcmp(rold, rcur, sizeof(*rold)) == 0)
 		return true;
 
-	/* old(off=20,r=10) is equal to cur(off=22,re=22 or 5 or 0)
-	 * since both cannot be used for packet access and safe(old)
-	 * pointer has smaller off that could be used for further
-	 * 'if (ptr > data_end)' check
-	 * Ex:
-	 * old(off=20,r=10) and cur(off=22,r=22) and cur(off=22,r=0) mean
-	 * that we cannot access the packet.
-	 * The safe range is:
-	 * [ptr, ptr + range - off)
-	 * so whenever off >=range, it means no safe bytes from this pointer.
-	 * When comparing old->off <= cur->off, it means that older code
-	 * went with smaller offset and that offset was later
-	 * used to figure out the safe range after 'if (ptr > data_end)' check
-	 * Say, 'old' state was explored like:
-	 * ... R3(off=0, r=0)
-	 * R4 = R3 + 20
-	 * ... now R4(off=20,r=0)  <-- here
-	 * if (R4 > data_end)
-	 * ... R4(off=20,r=20), R3(off=0,r=20) and R3 can be used to access.
-	 * ... the code further went all the way to bpf_exit.
-	 * Now the 'cur' state at the mark 'here' has R4(off=30,r=0).
-	 * old_R4(off=20,r=0) equal to cur_R4(off=30,r=0), since if the verifier
-	 * goes further, such cur_R4 will give larger safe packet range after
-	 * 'if (R4 > data_end)' and all further insn were already good with r=20,
-	 * so they will be good with r=30 and we can prune the search.
-	 */
-	if (!env->strict_alignment && old->off <= cur->off &&
-	    old->off >= old->range && cur->off >= cur->range)
+	if (rold->type == NOT_INIT)
+		/* explored state can't have used this */
 		return true;
+	if (rcur->type == NOT_INIT)
+		return false;
+	switch (rold->type) {
+	case SCALAR_VALUE:
+		if (rcur->type == SCALAR_VALUE) {
+			/* new val must satisfy old val knowledge */
+			return range_within(rold, rcur) &&
+			       tnum_in(rold->var_off, rcur->var_off);
+		} else {
+			/* if we knew anything about the old value, we're not
+			 * equal, because we can't know anything about the
+			 * scalar value of the pointer in the new value.
+			 */
+			return rold->min_value == BPF_REGISTER_MIN_RANGE &&
+			       rold->max_value == BPF_REGISTER_MAX_RANGE &&
+			       tnum_is_unknown(rold->var_off);
+		}
+	case PTR_TO_MAP_VALUE:
+		if (varlen_map_access) {
+			/* If the new min/max/var_off satisfy the old ones and
+			 * everything else matches, we are OK.
+			 * We don't care about the 'id' value, because nothing
+			 * uses it for PTR_TO_MAP_VALUE (only for ..._OR_NULL)
+			 */
+			return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
+			       range_within(rold, rcur) &&
+			       tnum_in(rold->var_off, rcur->var_off);
+		} else {
+			/* If the ranges/var_off were not the same, but
+			 * everything else was and we didn't do a variable
+			 * access into a map then we are a-ok.
+			 */
+			return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0;
+		}
+	case PTR_TO_MAP_VALUE_OR_NULL:
+		/* a PTR_TO_MAP_VALUE with no offset (fixed or variable) can
+		 * safely be used as a PTR_TO_MAP_VALUE_OR_NULL into the same
+		 * map.  (We can't do the same thing for a CONST_PTR_TO_MAP,
+		 * because its map_ptr changed when we NULL-checked it.)
+		 */
+		return rcur->type == PTR_TO_MAP_VALUE &&
+		       rcur->map_ptr == rold->map_ptr &&
+		       tnum_equals_const(rcur->var_off, 0) &&
+		       rcur->off == 0;
+	case PTR_TO_PACKET:
+		if (rcur->type != PTR_TO_PACKET)
+			return false;
+		/* We must have at least as much range as the old ptr
+		 * did, so that any accesses which were safe before are
+		 * still safe.  This is true even if old range < old off,
+		 * since someone could have accessed through (ptr - k), or
+		 * even done ptr -= k in a register, to get a safe access.
+		 */
+		if (rold->range > rcur->range)
+			return false;
+		/* If the offsets don't match, we can't trust our alignment;
+		 * nor can we be sure that we won't fall out of range.
+		 */
+		if (rold->off != rcur->off)
+			return false;
+		/* new val must satisfy old val knowledge */
+		return range_within(rold, rcur) &&
+		       tnum_in(rold->var_off, rcur->var_off);
+	case PTR_TO_CTX:
+	case CONST_PTR_TO_MAP:
+	case PTR_TO_STACK:
+	case PTR_TO_PACKET_END:
+		/* Only valid matches are exact, which memcmp() above
+		 * would have accepted
+		 */
+	default:
+		/* Don't know what's going on, just say it's not safe */
+		return false;
+	}
 
+	/* Shouldn't get here; if we do, say it's not safe */
+	WARN_ON_ONCE(1);
 	return false;
 }
 
@@ -2790,43 +3041,11 @@ static bool states_equal(struct bpf_verifier_env *env,
 			 struct bpf_verifier_state *cur)
 {
 	bool varlen_map_access = env->varlen_map_value_access;
-	struct bpf_reg_state *rold, *rcur;
 	int i;
 
 	for (i = 0; i < MAX_BPF_REG; i++) {
-		rold = &old->regs[i];
-		rcur = &cur->regs[i];
-
-		if (memcmp(rold, rcur, sizeof(*rold)) == 0)
-			continue;
-
-		/* If the ranges were not the same, but everything else was and
-		 * we didn't do a variable access into a map then we are a-ok.
-		 */
-		if (!varlen_map_access &&
-		    memcmp(rold, rcur, offsetofend(struct bpf_reg_state, id)) == 0)
-			continue;
-
-		/* If we didn't map access then again we don't care about the
-		 * mismatched range values and it's ok if our old type was
-		 * UNKNOWN and we didn't go to a NOT_INIT'ed reg.
-		 */
-		if (rold->type == NOT_INIT ||
-		    (!varlen_map_access && rold->type == UNKNOWN_VALUE &&
-		     rcur->type != NOT_INIT))
-			continue;
-
-		/* Don't care about the reg->id in this case. */
-		if (rold->type == PTR_TO_MAP_VALUE_OR_NULL &&
-		    rcur->type == PTR_TO_MAP_VALUE_OR_NULL &&
-		    rold->map_ptr == rcur->map_ptr)
-			continue;
-
-		if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET &&
-		    compare_ptrs_to_packet(env, rold, rcur))
-			continue;
-
-		return false;
+		if (!regsafe(&old->regs[i], &cur->regs[i], varlen_map_access))
+			return false;
 	}
 
 	for (i = 0; i < MAX_BPF_STACK; i++) {
@@ -2843,16 +3062,16 @@ static bool states_equal(struct bpf_verifier_env *env,
 			continue;
 		if (old->stack_slot_type[i] != STACK_SPILL)
 			continue;
-		if (memcmp(&old->spilled_regs[i / BPF_REG_SIZE],
-			   &cur->spilled_regs[i / BPF_REG_SIZE],
-			   sizeof(old->spilled_regs[0])))
-			/* when explored and current stack slot types are
-			 * the same, check that stored pointers types
+		if (!regsafe(&old->spilled_regs[i / BPF_REG_SIZE],
+			     &cur->spilled_regs[i / BPF_REG_SIZE],
+			     varlen_map_access))
+			/* when explored and current stack slot are both storing
+			 * spilled registers, check that stored pointers types
 			 * are the same as well.
 			 * Ex: explored safe path could have stored
-			 * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -8}
+			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
 			 * but current path has stored:
-			 * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -16}
+			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
 			 * such verifier states are not equivalent.
 			 * return false to continue verification of this path
 			 */
@@ -3174,7 +3393,6 @@ static int do_check(struct bpf_verifier_env *env)
 				verbose("invalid BPF_LD mode\n");
 				return -EINVAL;
 			}
-			reset_reg_range_values(regs, insn->dst_reg);
 		} else {
 			verbose("unknown insn class %d\n", class);
 			return -EINVAL;

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ