[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20190130140251.23784-1-m@lambda.lt>
Date: Wed, 30 Jan 2019 15:02:51 +0100
From: Martynas Pumputis <m@...bda.lt>
To: netdev@...r.kernel.org
Cc: ast@...nel.org, daniel@...earbox.net, m@...bda.lt
Subject: [PATCH bpf-next] bpf: add optional memory accounting for maps
Previously, memory allocated for a map was not accounted. Therefore,
this memory could not be taken into consideration by the cgroups
memory controller.
This patch introduces the "BPF_F_ACCOUNT_MEM" flag which enables
the memory accounting for a map, and it can be set during
the map creation ("BPF_MAP_CREATE") in "map_flags".
When enabled, we account only that amount of memory which is charged
against the "RLIMIT_MEMLOCK" limit.
To validate the change, first we create the memory cgroup "test-map":
# mkdir /sys/fs/cgroup/memory/test-map
And then we run the following program against the cgroup:
$ cat test_map.c
<..>
int main() {
usleep(3 * 1000000);
assert(bpf_create_map(BPF_MAP_TYPE_HASH, 8, 16, 65536, 0) > 0);
usleep(3 * 1000000);
}
# cgexec -g memory:test-map ./test_map &
# cat /sys/fs/cgroup/memory/test-map/memory{,.kmem}.usage_in_bytes
397312
258048
<after 3 sec the map has been created>
# bpftool map list
19: hash flags 0x0
key 8B value 16B max_entries 65536 memlock 5771264B
# cat /sys/fs/cgroup/memory/test-map/memory{,.kmem}.usage_in_bytes
401408
262144
As we can see, the memory allocated for map is not accounted, as
397312B + 5771264B > 401408B.
Next, we enabled the accounting and re-run the test:
$ cat test_map.c
<..>
int main() {
usleep(3 * 1000000);
assert(bpf_create_map(BPF_MAP_TYPE_HASH, 8, 16, 65536, BPF_F_ACCOUNT_MEM) > 0);
usleep(3 * 1000000);
}
# cgexec -g memory:test-map ./test_map &
# cat /sys/fs/cgroup/memory/test-map/memory{,.kmem}.usage_in_bytes
450560
307200
<after 3 sec the map has been created>
# bpftool map list
20: hash flags 0x80
key 8B value 16B max_entries 65536 memlock 5771264B
# cat /sys/fs/cgroup/memory/test-map/memory{,.kmem}.usage_in_bytes
6221824
6078464
This time, the memory (including kmem) is accounted, as
450560B + 5771264B <= 6221824B
Signed-off-by: Martynas Pumputis <m@...bda.lt>
---
include/linux/bpf.h | 5 +++--
include/uapi/linux/bpf.h | 2 ++
kernel/bpf/arraymap.c | 14 +++++++++-----
kernel/bpf/bpf_lru_list.c | 11 +++++++++--
kernel/bpf/bpf_lru_list.h | 1 +
kernel/bpf/cpumap.c | 12 +++++++++---
kernel/bpf/devmap.c | 10 ++++++++--
kernel/bpf/hashtab.c | 19 ++++++++++++++-----
kernel/bpf/lpm_trie.c | 19 +++++++++++++------
kernel/bpf/queue_stack_maps.c | 5 +++--
kernel/bpf/reuseport_array.c | 3 ++-
kernel/bpf/stackmap.c | 12 ++++++++----
kernel/bpf/syscall.c | 12 ++++++++----
kernel/bpf/xskmap.c | 9 +++++++--
net/core/sock_map.c | 13 +++++++++----
tools/include/uapi/linux/bpf.h | 3 +++
16 files changed, 108 insertions(+), 42 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e734f163bd0b..353a3f4304fe 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -79,7 +79,8 @@ struct bpf_map {
u32 btf_value_type_id;
struct btf *btf;
bool unpriv_array;
- /* 55 bytes hole */
+ bool account_mem;
+ /* 54 bytes hole */
/* The 3rd and 4th cacheline with misc members to avoid false sharing
* particularly with refcounting.
@@ -506,7 +507,7 @@ void bpf_map_put(struct bpf_map *map);
int bpf_map_precharge_memlock(u32 pages);
int bpf_map_charge_memlock(struct bpf_map *map, u32 pages);
void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages);
-void *bpf_map_area_alloc(size_t size, int numa_node);
+void *bpf_map_area_alloc(size_t size, int numa_node, bool account_mem);
void bpf_map_area_free(void *base);
void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 91c43884f295..a374ccbaa51b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -278,6 +278,8 @@ enum bpf_attach_type {
#define BPF_F_NO_COMMON_LRU (1U << 1)
/* Specify numa node during map creation */
#define BPF_F_NUMA_NODE (1U << 2)
+/* Enable memory accounting for map */
+#define BPF_F_ACCOUNT_MEM (1U << 7)
#define BPF_OBJ_NAME_LEN 16U
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 25632a75d630..86417f2e6f1b 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -34,14 +34,17 @@ static void bpf_array_free_percpu(struct bpf_array *array)
}
}
-static int bpf_array_alloc_percpu(struct bpf_array *array)
+static int bpf_array_alloc_percpu(struct bpf_array *array, bool account_mem)
{
void __percpu *ptr;
+ gfp_t gfp = GFP_USER | __GFP_NOWARN;
int i;
+ if (account_mem)
+ gfp |= __GFP_ACCOUNT;
+
for (i = 0; i < array->map.max_entries; i++) {
- ptr = __alloc_percpu_gfp(array->elem_size, 8,
- GFP_USER | __GFP_NOWARN);
+ ptr = __alloc_percpu_gfp(array->elem_size, 8, gfp);
if (!ptr) {
bpf_array_free_percpu(array);
return -ENOMEM;
@@ -82,6 +85,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
u32 elem_size, index_mask, max_entries;
bool unpriv = !capable(CAP_SYS_ADMIN);
u64 cost, array_size, mask64;
+ bool account_mem = (attr->map_flags & BPF_F_ACCOUNT_MEM);
struct bpf_array *array;
elem_size = round_up(attr->value_size, 8);
@@ -129,7 +133,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
return ERR_PTR(ret);
/* allocate all map elements and zero-initialize them */
- array = bpf_map_area_alloc(array_size, numa_node);
+ array = bpf_map_area_alloc(array_size, numa_node, account_mem);
if (!array)
return ERR_PTR(-ENOMEM);
array->index_mask = index_mask;
@@ -140,7 +144,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
array->map.pages = cost;
array->elem_size = elem_size;
- if (percpu && bpf_array_alloc_percpu(array)) {
+ if (percpu && bpf_array_alloc_percpu(array, account_mem)) {
bpf_map_area_free(array);
return ERR_PTR(-ENOMEM);
}
diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c
index e6ef4401a138..4d58537e0af2 100644
--- a/kernel/bpf/bpf_lru_list.c
+++ b/kernel/bpf/bpf_lru_list.c
@@ -7,6 +7,7 @@
#include <linux/cpumask.h>
#include <linux/spinlock.h>
#include <linux/percpu.h>
+#include <linux/gfp.h>
#include "bpf_lru_list.h"
@@ -646,12 +647,17 @@ static void bpf_lru_list_init(struct bpf_lru_list *l)
}
int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset,
+ bool account_mem,
del_from_htab_func del_from_htab, void *del_arg)
{
+ gfp_t gfp = GFP_KERNEL;
int cpu;
+ if (account_mem)
+ gfp |= __GFP_ACCOUNT;
+
if (percpu) {
- lru->percpu_lru = alloc_percpu(struct bpf_lru_list);
+ lru->percpu_lru = alloc_percpu_gfp(struct bpf_lru_list, gfp);
if (!lru->percpu_lru)
return -ENOMEM;
@@ -665,7 +671,8 @@ int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset,
} else {
struct bpf_common_lru *clru = &lru->common_lru;
- clru->local_list = alloc_percpu(struct bpf_lru_locallist);
+ clru->local_list = alloc_percpu_gfp(struct bpf_lru_locallist,
+ gfp);
if (!clru->local_list)
return -ENOMEM;
diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h
index 7d4f89b7cb84..89566665592b 100644
--- a/kernel/bpf/bpf_lru_list.h
+++ b/kernel/bpf/bpf_lru_list.h
@@ -74,6 +74,7 @@ static inline void bpf_lru_node_set_ref(struct bpf_lru_node *node)
}
int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset,
+ bool account_mem,
del_from_htab_func del_from_htab, void *delete_arg);
void bpf_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset,
u32 elem_size, u32 nr_elems);
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 8974b3755670..1e84bf78716e 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -81,6 +81,8 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
struct bpf_cpu_map *cmap;
int err = -ENOMEM;
u64 cost;
+ bool account_mem = (attr->map_flags & BPF_F_ACCOUNT_MEM);
+ gfp_t gfp = GFP_KERNEL;
int ret;
if (!capable(CAP_SYS_ADMIN))
@@ -117,16 +119,20 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
goto free_cmap;
}
+ if (account_mem)
+ gfp |= __GFP_ACCOUNT;
/* A per cpu bitfield with a bit per possible CPU in map */
- cmap->flush_needed = __alloc_percpu(cpu_map_bitmap_size(attr),
- __alignof__(unsigned long));
+ cmap->flush_needed = __alloc_percpu_gfp(cpu_map_bitmap_size(attr),
+ __alignof__(unsigned long),
+ gfp);
if (!cmap->flush_needed)
goto free_cmap;
/* Alloc array for possible remote "destination" CPUs */
cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries *
sizeof(struct bpf_cpu_map_entry *),
- cmap->map.numa_node);
+ cmap->map.numa_node,
+ account_mem);
if (!cmap->cpu_map)
goto free_percpu;
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 191b79948424..acfc1b35aa51 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -90,6 +90,8 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
struct bpf_dtab *dtab;
int err = -EINVAL;
u64 cost;
+ bool account_mem = (attr->map_flags & BPF_F_ACCOUNT_MEM);
+ gfp_t gfp;
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
@@ -120,16 +122,20 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
err = -ENOMEM;
+ gfp = GFP_KERNEL | __GFP_NOWARN;
+ if (account_mem)
+ gfp |= __GFP_ACCOUNT;
/* A per cpu bitfield with a bit per possible net device */
dtab->flush_needed = __alloc_percpu_gfp(dev_map_bitmap_size(attr),
__alignof__(unsigned long),
- GFP_KERNEL | __GFP_NOWARN);
+ gfp);
if (!dtab->flush_needed)
goto free_dtab;
dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries *
sizeof(struct bpf_dtab_netdev *),
- dtab->map.numa_node);
+ dtab->map.numa_node,
+ account_mem);
if (!dtab->netdev_map)
goto free_dtab;
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 4b7c76765d9d..fc2f44451256 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -23,6 +23,7 @@
#define HTAB_CREATE_FLAG_MASK \
(BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \
+ BPF_F_ACCOUNT_MEM | \
BPF_F_RDONLY | BPF_F_WRONLY | BPF_F_ZERO_SEED)
struct bucket {
@@ -139,27 +140,32 @@ static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
return NULL;
}
-static int prealloc_init(struct bpf_htab *htab)
+static int prealloc_init(struct bpf_htab *htab, bool account_mem)
{
u32 num_entries = htab->map.max_entries;
+ gfp_t gfp = GFP_USER | __GFP_NOWARN;
int err = -ENOMEM, i;
if (!htab_is_percpu(htab) && !htab_is_lru(htab))
num_entries += num_possible_cpus();
htab->elems = bpf_map_area_alloc(htab->elem_size * num_entries,
- htab->map.numa_node);
+ htab->map.numa_node,
+ account_mem);
if (!htab->elems)
return -ENOMEM;
if (!htab_is_percpu(htab))
goto skip_percpu_elems;
+ if (account_mem)
+ gfp |= __GFP_ACCOUNT;
+
for (i = 0; i < num_entries; i++) {
u32 size = round_up(htab->map.value_size, 8);
void __percpu *pptr;
- pptr = __alloc_percpu_gfp(size, 8, GFP_USER | __GFP_NOWARN);
+ pptr = __alloc_percpu_gfp(size, 8, gfp);
if (!pptr)
goto free_elems;
htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size,
@@ -173,6 +179,7 @@ static int prealloc_init(struct bpf_htab *htab)
htab->map.map_flags & BPF_F_NO_COMMON_LRU,
offsetof(struct htab_elem, hash) -
offsetof(struct htab_elem, lru_node),
+ account_mem,
htab_lru_map_delete_node,
htab);
else
@@ -313,6 +320,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
*/
bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
+ bool account_mem = (attr->map_flags & BPF_F_ACCOUNT_MEM);
struct bpf_htab *htab;
int err, i;
u64 cost;
@@ -374,7 +382,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
err = -ENOMEM;
htab->buckets = bpf_map_area_alloc(htab->n_buckets *
sizeof(struct bucket),
- htab->map.numa_node);
+ htab->map.numa_node,
+ account_mem);
if (!htab->buckets)
goto free_htab;
@@ -389,7 +398,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
}
if (prealloc) {
- err = prealloc_init(htab);
+ err = prealloc_init(htab, account_mem);
if (err)
goto free_buckets;
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index abf1002080df..8421fdb816f3 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -277,16 +277,19 @@ static void *trie_lookup_elem(struct bpf_map *map, void *_key)
}
static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie,
- const void *value)
+ const void *value,
+ bool account_mem)
{
struct lpm_trie_node *node;
size_t size = sizeof(struct lpm_trie_node) + trie->data_size;
+ gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
if (value)
size += trie->map.value_size;
- node = kmalloc_node(size, GFP_ATOMIC | __GFP_NOWARN,
- trie->map.numa_node);
+ if (account_mem)
+ gfp |= __GFP_ACCOUNT;
+ node = kmalloc_node(size, gfp, trie->map.numa_node);
if (!node)
return NULL;
@@ -327,7 +330,7 @@ static int trie_update_elem(struct bpf_map *map,
goto out;
}
- new_node = lpm_trie_node_alloc(trie, value);
+ new_node = lpm_trie_node_alloc(trie, value, map->account_mem);
if (!new_node) {
ret = -ENOMEM;
goto out;
@@ -394,7 +397,7 @@ static int trie_update_elem(struct bpf_map *map,
goto out;
}
- im_node = lpm_trie_node_alloc(trie, NULL);
+ im_node = lpm_trie_node_alloc(trie, NULL, map->account_mem);
if (!im_node) {
ret = -ENOMEM;
goto out;
@@ -542,6 +545,8 @@ static int trie_delete_elem(struct bpf_map *map, void *_key)
static struct bpf_map *trie_alloc(union bpf_attr *attr)
{
struct lpm_trie *trie;
+ bool account_mem = (attr->map_flags & BPF_F_ACCOUNT_MEM);
+ gfp_t gfp = GFP_USER | __GFP_NOWARN;
u64 cost = sizeof(*trie), cost_per_node;
int ret;
@@ -558,7 +563,9 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
attr->value_size > LPM_VAL_SIZE_MAX)
return ERR_PTR(-EINVAL);
- trie = kzalloc(sizeof(*trie), GFP_USER | __GFP_NOWARN);
+ if (account_mem)
+ gfp |= __GFP_ACCOUNT;
+ trie = kzalloc(sizeof(*trie), gfp);
if (!trie)
return ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
index b384ea9f3254..040ec350af3d 100644
--- a/kernel/bpf/queue_stack_maps.c
+++ b/kernel/bpf/queue_stack_maps.c
@@ -11,7 +11,7 @@
#include "percpu_freelist.h"
#define QUEUE_STACK_CREATE_FLAG_MASK \
- (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY | BPF_F_ACCOUNT_MEM)
struct bpf_queue_stack {
@@ -69,6 +69,7 @@ static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
int ret, numa_node = bpf_map_attr_numa_node(attr);
struct bpf_queue_stack *qs;
u64 size, queue_size, cost;
+ bool account_mem = (attr->map_flags & BPF_F_ACCOUNT_MEM);
size = (u64) attr->max_entries + 1;
cost = queue_size = sizeof(*qs) + size * attr->value_size;
@@ -81,7 +82,7 @@ static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
if (ret < 0)
return ERR_PTR(ret);
- qs = bpf_map_area_alloc(queue_size, numa_node);
+ qs = bpf_map_area_alloc(queue_size, numa_node, account_mem);
if (!qs)
return ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c
index 18e225de80ff..a9a2709c7507 100644
--- a/kernel/bpf/reuseport_array.c
+++ b/kernel/bpf/reuseport_array.c
@@ -152,6 +152,7 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
int err, numa_node = bpf_map_attr_numa_node(attr);
struct reuseport_array *array;
u64 cost, array_size;
+ bool account_mem = (attr->map_flags & BPF_F_ACCOUNT_MEM);
if (!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
@@ -170,7 +171,7 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
return ERR_PTR(err);
/* allocate all map elements and zero-initialize them */
- array = bpf_map_area_alloc(array_size, numa_node);
+ array = bpf_map_area_alloc(array_size, numa_node, account_mem);
if (!array)
return ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index d43b14535827..46d37c7e09a2 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -61,13 +61,15 @@ static inline int stack_map_data_size(struct bpf_map *map)
sizeof(struct bpf_stack_build_id) : sizeof(u64);
}
-static int prealloc_elems_and_freelist(struct bpf_stack_map *smap)
+static int prealloc_elems_and_freelist(struct bpf_stack_map *smap,
+ bool account_mem)
{
u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size;
int err;
smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries,
- smap->map.numa_node);
+ smap->map.numa_node,
+ account_mem);
if (!smap->elems)
return -ENOMEM;
@@ -90,6 +92,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
u32 value_size = attr->value_size;
struct bpf_stack_map *smap;
u64 cost, n_buckets;
+ bool account_mem = (attr->map_flags & BPF_F_ACCOUNT_MEM);
int err;
if (!capable(CAP_SYS_ADMIN))
@@ -119,7 +122,8 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
if (cost >= U32_MAX - PAGE_SIZE)
return ERR_PTR(-E2BIG);
- smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr));
+ smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr),
+ account_mem);
if (!smap)
return ERR_PTR(-ENOMEM);
@@ -141,7 +145,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
if (err)
goto free_smap;
- err = prealloc_elems_and_freelist(smap);
+ err = prealloc_elems_and_freelist(smap, account_mem);
if (err)
goto put_buffers;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b155cd17c1bd..13f2e1731a47 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -131,25 +131,29 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
return map;
map->ops = ops;
map->map_type = type;
+ map->account_mem = (attr->map_flags & BPF_F_ACCOUNT_MEM);
return map;
}
-void *bpf_map_area_alloc(size_t size, int numa_node)
+void *bpf_map_area_alloc(size_t size, int numa_node, bool account_mem)
{
/* We definitely need __GFP_NORETRY, so OOM killer doesn't
* trigger under memory pressure as we really just want to
* fail instead.
*/
- const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO;
+ gfp_t gfp = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO;
void *area;
+ if (account_mem)
+ gfp |= __GFP_ACCOUNT;
+
if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
- area = kmalloc_node(size, GFP_USER | flags, numa_node);
+ area = kmalloc_node(size, GFP_USER | gfp, numa_node);
if (area != NULL)
return area;
}
- return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL | flags,
+ return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL | gfp,
__builtin_return_address(0));
}
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index 686d244e798d..bbc1f142326f 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -20,6 +20,8 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
int cpu, err = -EINVAL;
struct xsk_map *m;
u64 cost;
+ bool account_mem = (attr->map_flags & BPF_F_ACCOUNT_MEM);
+ gfp_t gfp = GFP_KERNEL;
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
@@ -49,7 +51,9 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
err = -ENOMEM;
- m->flush_list = alloc_percpu(struct list_head);
+ if (account_mem)
+ gfp |= __GFP_ACCOUNT;
+ m->flush_list = alloc_percpu_gfp(struct list_head, gfp);
if (!m->flush_list)
goto free_m;
@@ -58,7 +62,8 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
m->xsk_map = bpf_map_area_alloc(m->map.max_entries *
sizeof(struct xdp_sock *),
- m->map.numa_node);
+ m->map.numa_node,
+ account_mem);
if (!m->xsk_map)
goto free_percpu;
return &m->map;
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index be6092ac69f8..eefcfd1294c0 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -18,13 +18,15 @@ struct bpf_stab {
raw_spinlock_t lock;
};
-#define SOCK_CREATE_FLAG_MASK \
- (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+#define SOCK_CREATE_FLAG_MASK \
+ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY | \
+ BPF_F_ACCOUNT_MEM)
static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
{
struct bpf_stab *stab;
u64 cost;
+ bool account_mem = (attr->map_flags & BPF_F_ACCOUNT_MEM);
int err;
if (!capable(CAP_NET_ADMIN))
@@ -56,7 +58,8 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
stab->sks = bpf_map_area_alloc(stab->map.max_entries *
sizeof(struct sock *),
- stab->map.numa_node);
+ stab->map.numa_node,
+ account_mem);
if (stab->sks)
return &stab->map;
err = -ENOMEM;
@@ -788,6 +791,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
struct bpf_htab *htab;
int i, err;
u64 cost;
+ bool account = (attr->map_flags & BPF_F_ACCOUNT_MEM);
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
@@ -823,7 +827,8 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
htab->buckets = bpf_map_area_alloc(htab->buckets_num *
sizeof(struct bpf_htab_bucket),
- htab->map.numa_node);
+ htab->map.numa_node,
+ account);
if (!htab->buckets) {
err = -ENOMEM;
goto free_htab;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 91c43884f295..dc490e3fdce3 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -278,6 +278,9 @@ enum bpf_attach_type {
#define BPF_F_NO_COMMON_LRU (1U << 1)
/* Specify numa node during map creation */
#define BPF_F_NUMA_NODE (1U << 2)
+/* Enable memory accounting for map */
+#define BPF_F_ACCOUNT_MEM (1U << 7)
+
#define BPF_OBJ_NAME_LEN 16U
--
2.20.1
Powered by blists - more mailing lists