[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220921170002.29557-6-laoar.shao@gmail.com>
Date: Wed, 21 Sep 2022 16:59:57 +0000
From: Yafang Shao <laoar.shao@...il.com>
To: ast@...nel.org, daniel@...earbox.net, andrii@...nel.org,
kafai@...com, songliubraving@...com, yhs@...com,
john.fastabend@...il.com, kpsingh@...nel.org, sdf@...gle.com,
haoluo@...gle.com, jolsa@...nel.org, hannes@...xchg.org,
mhocko@...nel.org, roman.gushchin@...ux.dev, shakeelb@...gle.com,
songmuchun@...edance.com, akpm@...ux-foundation.org, tj@...nel.org,
lizefan.x@...edance.com
Cc: cgroups@...r.kernel.org, netdev@...r.kernel.org,
bpf@...r.kernel.org, linux-mm@...ck.org,
Yafang Shao <laoar.shao@...il.com>
Subject: [RFC PATCH bpf-next 05/10] bpf: Use scoped-based charge in bpf_map_area_alloc
Currently bpf_map_area_alloc() is used to allocate a container of struct
bpf_map or members in this container. To distinguish the map creation
and the other case, a new parameter struct bpf_map is added into
bpf_map_area_alloc(). Then for the non-map-creation case, we could get
the memcg from the map instead of using the current memcg.
Signed-off-by: Yafang Shao <laoar.shao@...il.com>
---
include/linux/bpf.h | 2 +-
kernel/bpf/arraymap.c | 2 +-
kernel/bpf/bloom_filter.c | 2 +-
kernel/bpf/bpf_local_storage.c | 2 +-
kernel/bpf/bpf_struct_ops.c | 6 +++---
kernel/bpf/cpumap.c | 5 +++--
kernel/bpf/devmap.c | 13 ++++++++-----
kernel/bpf/hashtab.c | 8 +++++---
kernel/bpf/local_storage.c | 2 +-
kernel/bpf/lpm_trie.c | 2 +-
kernel/bpf/offload.c | 2 +-
kernel/bpf/queue_stack_maps.c | 2 +-
kernel/bpf/reuseport_array.c | 2 +-
kernel/bpf/ringbuf.c | 15 +++++++++------
kernel/bpf/stackmap.c | 5 +++--
kernel/bpf/syscall.c | 16 ++++++++++++++--
net/core/sock_map.c | 10 ++++++----
net/xdp/xskmap.c | 2 +-
18 files changed, 61 insertions(+), 37 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d64d7a2..eca1502 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1696,7 +1696,7 @@ struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map);
void bpf_map_put_with_uref(struct bpf_map *map);
void bpf_map_put(struct bpf_map *map);
-void *bpf_map_area_alloc(u64 size, int numa_node);
+void *bpf_map_area_alloc(u64 size, int numa_node, struct bpf_map *map);
void *bpf_map_area_mmapable_alloc(u64 size, int numa_node);
void bpf_map_area_free(void *base, struct bpf_map *map);
bool bpf_map_write_active(const struct bpf_map *map);
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 8cf021e..dd79d0d 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -135,7 +135,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
array = data + PAGE_ALIGN(sizeof(struct bpf_array))
- offsetof(struct bpf_array, value);
} else {
- array = bpf_map_area_alloc(array_size, numa_node);
+ array = bpf_map_area_alloc(array_size, numa_node, NULL);
}
if (!array)
return ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c
index e59064d..6691f79 100644
--- a/kernel/bpf/bloom_filter.c
+++ b/kernel/bpf/bloom_filter.c
@@ -142,7 +142,7 @@ static struct bpf_map *bloom_map_alloc(union bpf_attr *attr)
}
bitset_bytes = roundup(bitset_bytes, sizeof(unsigned long));
- bloom = bpf_map_area_alloc(sizeof(*bloom) + bitset_bytes, numa_node);
+ bloom = bpf_map_area_alloc(sizeof(*bloom) + bitset_bytes, numa_node, NULL);
if (!bloom)
return ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 7b68d846..44498d7d 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -610,7 +610,7 @@ struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
unsigned int i;
u32 nbuckets;
- smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE);
+ smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE, NULL);
if (!smap)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&smap->map, attr);
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 9fb8ad1..37ba5c0 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -618,7 +618,7 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
*/
(vt->size - sizeof(struct bpf_struct_ops_value));
- st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE);
+ st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE, NULL);
if (!st_map)
return ERR_PTR(-ENOMEM);
@@ -626,10 +626,10 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
map = &st_map->map;
bpf_map_init_from_attr(map, attr);
- st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE);
+ st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE, map);
st_map->links =
bpf_map_area_alloc(btf_type_vlen(t) * sizeof(struct bpf_links *),
- NUMA_NO_NODE);
+ NUMA_NO_NODE, map);
st_map->image = bpf_jit_alloc_exec(PAGE_SIZE);
if (!st_map->uvalue || !st_map->links || !st_map->image) {
bpf_struct_ops_map_free(map);
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 7de2ae6..b593157 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -97,7 +97,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
attr->map_flags & ~BPF_F_NUMA_NODE)
return ERR_PTR(-EINVAL);
- cmap = bpf_map_area_alloc(sizeof(*cmap), NUMA_NO_NODE);
+ cmap = bpf_map_area_alloc(sizeof(*cmap), NUMA_NO_NODE, NULL);
if (!cmap)
return ERR_PTR(-ENOMEM);
@@ -112,7 +112,8 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
/* Alloc array for possible remote "destination" CPUs */
cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries *
sizeof(struct bpf_cpu_map_entry *),
- cmap->map.numa_node);
+ cmap->map.numa_node,
+ &cmap->map);
if (!cmap->cpu_map)
goto free_cmap;
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 3268ce7..807a4cd 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -89,12 +89,13 @@ struct bpf_dtab {
static LIST_HEAD(dev_map_list);
static struct hlist_head *dev_map_create_hash(unsigned int entries,
- int numa_node)
+ int numa_node,
+ struct bpf_map *map)
{
int i;
struct hlist_head *hash;
- hash = bpf_map_area_alloc((u64) entries * sizeof(*hash), numa_node);
+ hash = bpf_map_area_alloc((u64) entries * sizeof(*hash), numa_node, map);
if (hash != NULL)
for (i = 0; i < entries; i++)
INIT_HLIST_HEAD(&hash[i]);
@@ -136,7 +137,8 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets,
- dtab->map.numa_node);
+ dtab->map.numa_node,
+ &dtab->map);
if (!dtab->dev_index_head)
return -ENOMEM;
@@ -144,7 +146,8 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
} else {
dtab->netdev_map = bpf_map_area_alloc((u64) dtab->map.max_entries *
sizeof(struct bpf_dtab_netdev *),
- dtab->map.numa_node);
+ dtab->map.numa_node,
+ &dtab->map);
if (!dtab->netdev_map)
return -ENOMEM;
}
@@ -160,7 +163,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
- dtab = bpf_map_area_alloc(sizeof(*dtab), NUMA_NO_NODE);
+ dtab = bpf_map_area_alloc(sizeof(*dtab), NUMA_NO_NODE, NULL);
if (!dtab)
return ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index f542b51..89887df 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -349,7 +349,8 @@ static int prealloc_init(struct bpf_htab *htab)
num_entries += num_possible_cpus();
htab->elems = bpf_map_area_alloc((u64)htab->elem_size * num_entries,
- htab->map.numa_node);
+ htab->map.numa_node,
+ &htab->map);
if (!htab->elems)
return -ENOMEM;
@@ -510,7 +511,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
struct bpf_htab *htab;
int err, i;
- htab = bpf_map_area_alloc(sizeof(*htab), NUMA_NO_NODE);
+ htab = bpf_map_area_alloc(sizeof(*htab), NUMA_NO_NODE, NULL);
if (!htab)
return ERR_PTR(-ENOMEM);
@@ -549,7 +550,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
err = -ENOMEM;
htab->buckets = bpf_map_area_alloc(htab->n_buckets *
sizeof(struct bucket),
- htab->map.numa_node);
+ htab->map.numa_node,
+ &htab->map);
if (!htab->buckets)
goto free_htab;
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index c705d66..fcc7ece 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -313,7 +313,7 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
/* max_entries is not used and enforced to be 0 */
return ERR_PTR(-EINVAL);
- map = bpf_map_area_alloc(sizeof(struct bpf_cgroup_storage_map), numa_node);
+ map = bpf_map_area_alloc(sizeof(struct bpf_cgroup_storage_map), numa_node, NULL);
if (!map)
return ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index fd99360..3d329ae 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -558,7 +558,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
attr->value_size > LPM_VAL_SIZE_MAX)
return ERR_PTR(-EINVAL);
- trie = bpf_map_area_alloc(sizeof(*trie), NUMA_NO_NODE);
+ trie = bpf_map_area_alloc(sizeof(*trie), NUMA_NO_NODE, NULL);
if (!trie)
return ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index c9941a9..87c59da 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -372,7 +372,7 @@ struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
attr->map_type != BPF_MAP_TYPE_HASH)
return ERR_PTR(-EINVAL);
- offmap = bpf_map_area_alloc(sizeof(*offmap), NUMA_NO_NODE);
+ offmap = bpf_map_area_alloc(sizeof(*offmap), NUMA_NO_NODE, NULL);
if (!offmap)
return ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
index f2ec0c4..bf57e45 100644
--- a/kernel/bpf/queue_stack_maps.c
+++ b/kernel/bpf/queue_stack_maps.c
@@ -74,7 +74,7 @@ static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
size = (u64) attr->max_entries + 1;
queue_size = sizeof(*qs) + size * attr->value_size;
- qs = bpf_map_area_alloc(queue_size, numa_node);
+ qs = bpf_map_area_alloc(queue_size, numa_node, NULL);
if (!qs)
return ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c
index 3b6d1c7..fc6f6b6 100644
--- a/kernel/bpf/reuseport_array.c
+++ b/kernel/bpf/reuseport_array.c
@@ -155,7 +155,7 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
return ERR_PTR(-EPERM);
/* allocate all map elements and zero-initialize them */
- array = bpf_map_area_alloc(struct_size(array, ptrs, attr->max_entries), numa_node);
+ array = bpf_map_area_alloc(struct_size(array, ptrs, attr->max_entries), numa_node, NULL);
if (!array)
return ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index 74dd8dc..5eb7820 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -59,7 +59,8 @@ struct bpf_ringbuf_hdr {
u32 pg_off;
};
-static struct bpf_ringbuf *bpf_ringbuf_area_alloc(size_t data_sz, int numa_node)
+static struct bpf_ringbuf *bpf_ringbuf_area_alloc(size_t data_sz, int numa_node,
+ struct bpf_map *map)
{
const gfp_t flags = GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL |
__GFP_NOWARN | __GFP_ZERO;
@@ -89,7 +90,7 @@ static struct bpf_ringbuf *bpf_ringbuf_area_alloc(size_t data_sz, int numa_node)
* user-space implementations significantly.
*/
array_size = (nr_meta_pages + 2 * nr_data_pages) * sizeof(*pages);
- pages = bpf_map_area_alloc(array_size, numa_node);
+ pages = bpf_map_area_alloc(array_size, numa_node, map);
if (!pages)
return NULL;
@@ -127,11 +128,12 @@ static void bpf_ringbuf_notify(struct irq_work *work)
wake_up_all(&rb->waitq);
}
-static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node)
+static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node,
+ struct bpf_map *map)
{
struct bpf_ringbuf *rb;
- rb = bpf_ringbuf_area_alloc(data_sz, numa_node);
+ rb = bpf_ringbuf_area_alloc(data_sz, numa_node, map);
if (!rb)
return NULL;
@@ -164,13 +166,14 @@ static struct bpf_map *ringbuf_map_alloc(union bpf_attr *attr)
return ERR_PTR(-E2BIG);
#endif
- rb_map = bpf_map_area_alloc(sizeof(*rb_map), NUMA_NO_NODE);
+ rb_map = bpf_map_area_alloc(sizeof(*rb_map), NUMA_NO_NODE, NULL);
if (!rb_map)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&rb_map->map, attr);
- rb_map->rb = bpf_ringbuf_alloc(attr->max_entries, rb_map->map.numa_node);
+ rb_map->rb = bpf_ringbuf_alloc(attr->max_entries, rb_map->map.numa_node,
+ &rb_map->map);
if (!rb_map->rb) {
bpf_map_area_free(rb_map, &rb_map->map);
return ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 042b7d2..9440fab 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -49,7 +49,8 @@ static int prealloc_elems_and_freelist(struct bpf_stack_map *smap)
int err;
smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries,
- smap->map.numa_node);
+ smap->map.numa_node,
+ &smap->map);
if (!smap->elems)
return -ENOMEM;
@@ -100,7 +101,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
return ERR_PTR(-E2BIG);
cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap);
- smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr));
+ smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr), NULL);
if (!smap)
return ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 29ad913..727c04c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -362,9 +362,21 @@ static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)
flags, numa_node, __builtin_return_address(0));
}
-void *bpf_map_area_alloc(u64 size, int numa_node)
+void *bpf_map_area_alloc(u64 size, int numa_node, struct bpf_map *map)
{
- return __bpf_map_area_alloc(size, numa_node, false);
+ struct mem_cgroup *memcg, *old_memcg;
+ void *ptr;
+
+ if (!map)
+ return __bpf_map_area_alloc(size, numa_node, false);
+
+ memcg = bpf_map_get_memcg(map);
+ old_memcg = set_active_memcg(memcg);
+ ptr = __bpf_map_area_alloc(size, numa_node, false);
+ set_active_memcg(old_memcg);
+ bpf_map_put_memcg(memcg);
+
+ return ptr;
}
void *bpf_map_area_mmapable_alloc(u64 size, int numa_node)
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 8da9fd4..25a5ac4 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -41,7 +41,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
return ERR_PTR(-EINVAL);
- stab = bpf_map_area_alloc(sizeof(*stab), NUMA_NO_NODE);
+ stab = bpf_map_area_alloc(sizeof(*stab), NUMA_NO_NODE, NULL);
if (!stab)
return ERR_PTR(-ENOMEM);
@@ -50,7 +50,8 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
stab->sks = bpf_map_area_alloc((u64) stab->map.max_entries *
sizeof(struct sock *),
- stab->map.numa_node);
+ stab->map.numa_node,
+ &stab->map);
if (!stab->sks) {
bpf_map_area_free(stab, &stab->map);
return ERR_PTR(-ENOMEM);
@@ -1085,7 +1086,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
if (attr->key_size > MAX_BPF_STACK)
return ERR_PTR(-E2BIG);
- htab = bpf_map_area_alloc(sizeof(*htab), NUMA_NO_NODE);
+ htab = bpf_map_area_alloc(sizeof(*htab), NUMA_NO_NODE, NULL);
if (!htab)
return ERR_PTR(-ENOMEM);
@@ -1102,7 +1103,8 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
htab->buckets = bpf_map_area_alloc(htab->buckets_num *
sizeof(struct bpf_shtab_bucket),
- htab->map.numa_node);
+ htab->map.numa_node,
+ &htab->map);
if (!htab->buckets) {
err = -ENOMEM;
goto free_htab;
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
index 5abb87e..beb11fd 100644
--- a/net/xdp/xskmap.c
+++ b/net/xdp/xskmap.c
@@ -75,7 +75,7 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
numa_node = bpf_map_attr_numa_node(attr);
size = struct_size(m, xsk_map, attr->max_entries);
- m = bpf_map_area_alloc(size, numa_node);
+ m = bpf_map_area_alloc(size, numa_node, NULL);
if (!m)
return ERR_PTR(-ENOMEM);
--
1.8.3.1
Powered by blists - more mailing lists