[<prev] [next>] [day] [month] [year] [list]
Message-id: <508E0C57.2050704@samsung.com>
Date: Mon, 29 Oct 2012 08:55:51 +0400
From: Alexey Perevalov <a.perevalov@...sung.com>
To: netdev@...r.kernel.org
Subject: [PATCH net-next] net_cls: traffic counter based on classification
control cgroup
Hello.
First of all, It's request for comments.
I want to suggest a patch for counting ingress and engress traffic for
application placed to net_cls control group.
It's based on res_counters and holds counter per a network interfaces.
It's maybe to complex, and it should be separated:
I move menu entry for "Control group classifier" from network/QoS to
General Option/Control Group.
I don't like too many #ifdefs in tcp.c and udp.c which I introduced.
Also there is a problem as builtin module loaded before network device
module, due I initialize iface.usage_in_bytes in init function,
I have a plan to register cgroup files in callback invoked at
registration of network devices.
Here is entire patch:
From b13afb5b7f09e2a858a56ef5f9dfe7e12c4e8501 Mon Sep 17 00:00:00 2001
From: Alexey Perevalov <a.perevalov@...sung.com>
Date: Fri, 26 Oct 2012 17:45:44 +0400
Subject: Traffic statistics based on packet classification control group
---
include/net/cls_cgroup.h | 203
++++++++++++++++++++++++++++++++++----
include/net/cls_counter_holder.h | 26 +++++
init/Kconfig | 25 +++++
kernel/res_counter.c | 4 +
net/core/dev.c | 6 ++
net/ipv4/tcp.c | 27 ++++-
net/ipv4/udp.c | 6 ++
net/sched/Kconfig | 11 ---
net/sched/Makefile | 1 +
net/sched/cls_cgroup.c | 142 +++++++++++++++++++++++++-
net/sched/cls_counter_holder.c | 145 +++++++++++++++++++++++++++
11 files changed, 561 insertions(+), 35 deletions(-)
create mode 100644 include/net/cls_counter_holder.h
create mode 100644 net/sched/cls_counter_holder.c
diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h
index 2581638..3a6954f 100644
--- a/include/net/cls_cgroup.h
+++ b/include/net/cls_cgroup.h
@@ -17,50 +17,198 @@
#include <linux/hardirq.h>
#include <linux/rcupdate.h>
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+#include <linux/nsproxy.h>
+#include <linux/res_counter.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <net/cls_counter_holder.h>
+#include <net/sock.h>
+
+/*TODO hide all it to separate file*/
+
+struct cls_iface_cntrs {
+ char *dev_name;
+ struct res_counter snd_counter;
+ struct res_counter rcv_counter;
+ struct list_head link;
+};
+
+#endif /*CONFIG_NET_CLS_COUNTER*/
+
+
#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
struct cgroup_cls_state
{
struct cgroup_subsys_state css;
u32 classid;
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+ struct cls_iface_cntrs iface_stats;
+#endif /*CONFIG_NET_CLS_COUNTER*/
};
extern void sock_update_classid(struct sock *sk, struct task_struct
*task);
-#if IS_BUILTIN(CONFIG_NET_CLS_CGROUP)
-static inline u32 task_cls_classid(struct task_struct *p)
+#if IS_MODULE(CONFIG_NET_CLS_CGROUP)
+static inline struct cgroup_cls_state *get_cls_cgroup(struct
task_struct *p)
{
- u32 classid;
+ struct cgroup_subsys_state *css = task_subsys_state(p,
+ net_cls_subsys_id);
+ if (css)
+ return container_of(css,
+ struct cgroup_cls_state, css);
+ return NULL;
+}
+#elif IS_BUILTIN(CONFIG_NET_CLS_CGROUP)
+static inline struct cgroup_cls_state *get_cls_cgroup(struct
task_struct *p)
+{
+ return container_of(task_subsys_state(p, net_cls_subsys_id),
+ struct cgroup_cls_state, css);
+}
+#endif
- if (in_interrupt())
- return 0;
- rcu_read_lock();
- classid = container_of(task_subsys_state(p, net_cls_subsys_id),
- struct cgroup_cls_state, css)->classid;
- rcu_read_unlock();
+#endif /*CONFIG_NET_CLS_CGROUP*/
- return classid;
+#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+static inline u32 skb_cls_classid(const struct sk_buff *skb)
+{
+ return (skb && skb->sk) ? skb->sk->sk_classid : 0;
+}
+
+static inline int get_ifindex_from_skb(const struct sk_buff *skb)
+{
+ int ifindex = 0;
+ if (skb)
+ ifindex = skb->skb_iif;
+ return ifindex;
+}
+
+static struct cls_iface_cntrs *find_cls_counter(
+ struct cgroup_cls_state *cls_cgroup,
+ const char *dev_name,
+ bool create)
+{
+ /*TODO Add lock*/
+ struct cls_iface_cntrs *entry = NULL;
+
+ if (!dev_name) {
+ pr_err("cls please provide valid dev name");
+ return NULL;
+ }
+
+ list_for_each_entry(entry, &cls_cgroup->iface_stats.link, link)
+ if (!strcmp(entry->dev_name, dev_name))
+ return entry;
+
+ if (!create)
+ return entry;
+
+ /*not found, insert*/
+ entry = kmalloc(sizeof(struct cls_iface_cntrs), GFP_ATOMIC);
+ entry->dev_name = kstrdup(dev_name, GFP_ATOMIC);
+ memset(&entry->rcv_counter, 0, sizeof(struct res_counter));
+ memset(&entry->snd_counter, 0, sizeof(struct res_counter));
+ res_counter_init(&entry->rcv_counter, NULL);
+ res_counter_init(&entry->snd_counter, NULL);
+ list_add_tail(&entry->link, &cls_cgroup->iface_stats.link);
+ return entry;
}
-#elif IS_MODULE(CONFIG_NET_CLS_CGROUP)
+
+static void charge_net_cls_snd(struct cgroup_cls_state *cls_cgroup,
+ const u32 copied, const char *dev_name)
+{
+ struct res_counter *fail_res;
+ int res;
+ struct cls_iface_cntrs *cnt = find_cls_counter(cls_cgroup,
+ dev_name, true);
+
+ if (!cnt)
+ return;
+
+ res = res_counter_charge(&cnt->snd_counter, copied, &fail_res);
+}
+
+static char *get_dev_name(const int ifindex)
+{
+ struct net *net = NULL;
+ struct nsproxy *nsproxy = NULL;
+ struct net_device *net_dev = NULL;
+
+ nsproxy = task_nsproxy(current);
+ if (!nsproxy) {
+ pr_debug("cls cant find task_nsproxy");
+ return NULL;
+ }
+
+ net = get_net(nsproxy->net_ns);
+ if (!net) {
+ pr_debug("cls cant find net");
+ return NULL;
+ }
+ net_dev = dev_get_by_index(net, ifindex);
+
+ return net_dev ? net_dev->name : NULL;
+}
+
+static void charge_net_cls_rcv(struct cgroup_cls_state *cls_cgroup,
+ const u32 copied, const int ifindex)
+{
+ char *dev_name = get_dev_name(ifindex);
+ struct res_counter *fail_res;
+ int res;
+ struct cls_iface_cntrs *cnt = find_cls_counter(cls_cgroup,
+ dev_name, true);
+
+ if (!cnt)
+ return;
+
+ res = res_counter_charge(&cnt->rcv_counter, copied, &fail_res);
+}
+
+static inline void count_cls_rcv(struct task_struct *p, const u32
copied, const int ifindex)
+{
+ struct cgroup_cls_state *cls_cgroup;
+
+ cls_cgroup = get_cls_cgroup(p);
+
+ if (cls_cgroup)
+ charge_net_cls_rcv(cls_cgroup, copied, ifindex);
+}
+
+static inline void count_cls_snd(u32 classid, const u32 copied,
+ const char *dev_name)
+{
+ struct cgroup_cls_state *cls_cgroup;
+
+ cls_cgroup = find_cls_cgroup_by_classid(classid);
+
+ if (cls_cgroup)
+ charge_net_cls_snd(cls_cgroup, copied, dev_name);
+}
+#endif /*CONFIG_NET_CLS_COUNTER*/
+
static inline u32 task_cls_classid(struct task_struct *p)
{
- struct cgroup_subsys_state *css;
- u32 classid = 0;
+ int classid = 0;
+ struct cgroup_cls_state *cls_cgroup = NULL;
if (in_interrupt())
return 0;
rcu_read_lock();
- css = task_subsys_state(p, net_cls_subsys_id);
- if (css)
- classid = container_of(css,
- struct cgroup_cls_state, css)->classid;
+
+ cls_cgroup = get_cls_cgroup(p);
+ if (cls_cgroup)
+ classid = cls_cgroup->classid;
+
rcu_read_unlock();
return classid;
}
-#endif
-#else /* !CGROUP_NET_CLS_CGROUP */
+
+#else /* !CONFIG_NET_CLS_CGROUP */
static inline void sock_update_classid(struct sock *sk, struct
task_struct *task)
{
}
@@ -69,5 +217,20 @@ static inline u32 task_cls_classid(struct
task_struct *p)
{
return 0;
}
-#endif /* CGROUP_NET_CLS_CGROUP */
+#endif /* CONFIG_NET_CLS_CGROUP */
+
+#if !IS_ENABLED(CONFIG_NET_CLS_CGROUP) ||
!IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+static inline void count_cls_rcv(struct task_struct *p, const u32
copied, const int ifindex)
+{
+}
+
+static inline void count_cls_snd(u32 classid, const u32 copied, const
char *dev_name)
+{
+}
+
+static inline u32 skb_cls_classid(const struct sk_buff *skb)
+{
+ return 0;
+}
+#endif
#endif /* _NET_CLS_CGROUP_H */
diff --git a/include/net/cls_counter_holder.h
b/include/net/cls_counter_holder.h
new file mode 100644
index 0000000..a129baa
--- /dev/null
+++ b/include/net/cls_counter_holder.h
@@ -0,0 +1,26 @@
+/*
+ * cls_counter_holder.c Interface for holding references of the
+ * net cls cgroup instances.
+ *
+ * Authors: Alexey Perevalov, <a.perevalov@...sung.com>
+ *
+ * Changes:
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _NET_CLS_COUNTER_HOLDER_H_
+#define _NET_CLS_COUNTER_HOLDER_H_
+
+#include <net/cls_cgroup.h>
+
+struct cgroup_cls_state;
+
+void insert_cls_cgroup_entry(struct cgroup_cls_state *obj);
+void delete_cls_cgroup_entry(const u32 classid);
+struct cgroup_cls_state *find_cls_cgroup_by_classid(const u32 classid);
+
+
+#endif /* _NET_CLS_COUNTER_HOLDER_H_ */
diff --git a/init/Kconfig b/init/Kconfig
index 6fdd6e3..2e6af85 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -841,6 +841,31 @@ config CGROUP_HUGETLB
control group is tracked in the third page lru pointer. This means
that we cannot use the controller with huge page less than 3 pages.
+menuconfig NET_CLS_CGROUP
+ tristate "Control Group Classifier"
+ select NET_CLS
+ depends on CGROUPS
+ ---help---
+ Say Y here if you want to classify packets based on the control
+ cgroup of their process.
+
+ To compile this code as a module, choose M here: the
+ module will be called cls_cgroup.
+
+if NET_CLS_CGROUP
+config NET_CLS_COUNTER
+ bool "Network traffic counter for network Control Group Classifier"
+ select NET_CLS
+ default n
+ depends on NET_CLS_CGROUP && RESOURCE_COUNTERS
+ ---help---
+ Say Y here if you want to count traffic associate with the control
+ cgroup.
+
+ To add functionality to cls_cgroup select y.
+
+endif #NET_CLS_CGROUP
+
config CGROUP_PERF
bool "Enable perf_event per-cpu per-container group (cgroup)
monitoring"
depends on PERF_EVENTS && CGROUPS
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index ad581aa..f5767af 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -13,6 +13,8 @@
#include <linux/res_counter.h>
#include <linux/uaccess.h>
#include <linux/mm.h>
+#include <linux/export.h>
+
void res_counter_init(struct res_counter *counter, struct res_counter
*parent)
{
@@ -21,6 +23,7 @@ void res_counter_init(struct res_counter *counter,
struct res_counter *parent)
counter->soft_limit = RESOURCE_MAX;
counter->parent = parent;
}
+EXPORT_SYMBOL(res_counter_init);
int res_counter_charge_locked(struct res_counter *counter, unsigned
long val,
bool force)
@@ -170,6 +173,7 @@ u64 res_counter_read_u64(struct res_counter
*counter, int member)
return *res_counter_member(counter, member);
}
#endif
+EXPORT_SYMBOL(res_counter_read_u64);
int res_counter_memparse_write_strategy(const char *buf,
unsigned long long *res)
diff --git a/net/core/dev.c b/net/core/dev.c
index b4978e2..61c9a61 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -135,6 +135,7 @@
#include <linux/net_tstamp.h>
#include <linux/static_key.h>
#include <net/flow_keys.h>
+#include <net/cls_cgroup.h>
#include "net-sysfs.h"
@@ -2570,6 +2571,11 @@ int dev_queue_xmit(struct sk_buff *skb)
*/
rcu_read_lock_bh();
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+ if (dev)
+ count_cls_snd(skb_cls_classid(skb), skb->len, dev->name);
+#endif
+
skb_update_prio(skb);
txq = netdev_pick_tx(dev, skb);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index eace049..3013509 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -276,6 +276,7 @@
#include <net/ip.h>
#include <net/netdma.h>
#include <net/sock.h>
+#include <net/cls_cgroup.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
@@ -1467,6 +1468,9 @@ int tcp_read_sock(struct sock *sk,
read_descriptor_t *desc,
u32 seq = tp->copied_seq;
u32 offset;
int copied = 0;
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+ int ifindex = 0;
+#endif
if (sk->sk_state == TCP_LISTEN)
return -ENOTCONN;
@@ -1509,6 +1513,9 @@ int tcp_read_sock(struct sock *sk,
read_descriptor_t *desc,
++seq;
break;
}
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+ ifindex = get_ifindex_from_skb(skb);
+#endif
sk_eat_skb(sk, skb, false);
if (!desc->count)
break;
@@ -1519,8 +1526,12 @@ int tcp_read_sock(struct sock *sk,
read_descriptor_t *desc,
tcp_rcv_space_adjust(sk);
/* Clean up data we have read: This will do ACK frames. */
- if (copied > 0)
+ if (copied > 0) {
tcp_cleanup_rbuf(sk, copied);
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+ count_cls_rcv(current, copied, ifindex);
+#endif
+ }
return copied;
}
EXPORT_SYMBOL(tcp_read_sock);
@@ -1548,6 +1559,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock
*sk, struct msghdr *msg,
bool copied_early = false;
struct sk_buff *skb;
u32 urg_hole = 0;
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+ int ifindex = 0;
+#endif
lock_sock(sk);
@@ -1872,6 +1886,9 @@ skip_copy:
if (tcp_hdr(skb)->fin)
goto found_fin_ok;
if (!(flags & MSG_PEEK)) {
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+ ifindex = get_ifindex_from_skb(skb);
+#endif
sk_eat_skb(sk, skb, copied_early);
copied_early = false;
}
@@ -1881,6 +1898,9 @@ skip_copy:
/* Process the FIN. */
++*seq;
if (!(flags & MSG_PEEK)) {
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+ ifindex = get_ifindex_from_skb(skb);
+#endif
sk_eat_skb(sk, skb, copied_early);
copied_early = false;
}
@@ -1923,6 +1943,11 @@ skip_copy:
/* Clean up data we have read: This will do ACK frames. */
tcp_cleanup_rbuf(sk, copied);
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+ if (copied > 0)
+ count_cls_rcv(current, copied, ifindex);
+#endif
+
release_sock(sk);
return copied;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 79c8dbe..a143629 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -101,6 +101,7 @@
#include <linux/skbuff.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <net/cls_cgroup.h>
#include <net/net_namespace.h>
#include <net/icmp.h>
#include <net/route.h>
@@ -1254,6 +1255,11 @@ try_again:
if (flags & MSG_TRUNC)
err = ulen;
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+ if (ulen > 0)
+ count_cls_rcv(current, ulen, get_ifindex_from_skb(skb));
+#endif
+
out_free:
skb_free_datagram_locked(sk, skb);
out:
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 62fb51f..926dedf 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -418,17 +418,6 @@ config NET_CLS_FLOW
To compile this code as a module, choose M here: the
module will be called cls_flow.
-config NET_CLS_CGROUP
- tristate "Control Group Classifier"
- select NET_CLS
- depends on CGROUPS
- ---help---
- Say Y here if you want to classify packets based on the control
- cgroup of their process.
-
- To compile this code as a module, choose M here: the
- module will be called cls_cgroup.
-
config NET_EMATCH
bool "Extended Matches"
select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 978cbf0..95dbb12 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -49,6 +49,7 @@ obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o
obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o
obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o
obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o
+obj-$(CONFIG_NET_CLS_COUNTER) += cls_counter_holder.o
obj-$(CONFIG_NET_EMATCH) += ematch.o
obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o
obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 709b0fb..dcf7f60 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -22,6 +22,11 @@
#include <net/pkt_cls.h>
#include <net/sock.h>
#include <net/cls_cgroup.h>
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+#include <linux/rbtree.h>
+#include <linux/res_counter.h>
+#include <net/cls_counter_holder.h>
+#endif
static inline struct cgroup_cls_state *cgrp_cls_state(struct cgroup *cgrp)
{
@@ -46,11 +51,49 @@ static struct cgroup_subsys_state
*cgrp_create(struct cgroup *cgrp)
if (cgrp->parent)
cs->classid = cgrp_cls_state(cgrp->parent)->classid;
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+ res_counter_init(&cs->iface_stats.snd_counter, NULL);
+ res_counter_init(&cs->iface_stats.rcv_counter, NULL);
+ cs->iface_stats.dev_name = 0;
+ INIT_LIST_HEAD(&cs->iface_stats.link);
+#endif
+
return &cs->css;
}
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+static struct cftype *cft_first;
+
+static inline void cgrp_counter_destroy(struct cgroup_cls_state *cs)
+{
+ struct list_head *pos, *q;
+ delete_cls_cgroup_entry(cs->classid);
+
+ list_for_each_safe(pos, q, &cs->iface_stats.link) {
+ struct cls_iface_cntrs *tmp = list_entry(
+ pos, struct cls_iface_cntrs, link);
+ list_del(pos);
+ if (!tmp)
+ continue;
+
+ if (!tmp->dev_name)
+ kfree(tmp->dev_name);
+ kfree(tmp);
+ }
+
+}
+#endif
+
static void cgrp_destroy(struct cgroup *cgrp)
{
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+
+ struct cgroup_cls_state *cs = cgrp_cls_state(cgrp);
+
+ if (!cs)
+ return;
+ cgrp_counter_destroy(cs);
+#endif
kfree(cgrp_cls_state(cgrp));
}
@@ -81,9 +124,59 @@ static u64 read_classid(struct cgroup *cgrp, struct
cftype *cft)
return cgrp_cls_state(cgrp)->classid;
}
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+static const char *rcv_label = "rcv:";
+static const char *snd_label = "snd:";
+
+static const char *extract_dev_name(const char *cgroup_file_name)
+{
+ const char *dot = strchr(cgroup_file_name, '.');
+ const size_t len = dot ? dot - cgroup_file_name :
strlen(cgroup_file_name);
+
+ return kstrndup(cgroup_file_name, len, GFP_KERNEL);
+}
+
+static int read_stat(struct cgroup *cgrp, struct cftype *cft,
+ struct cgroup_map_cb *cb)
+{
+ struct cgroup_cls_state *cs = cgrp_cls_state(cgrp);
+ const char *dev_name = extract_dev_name(cft->name);
+ struct cls_iface_cntrs *res = find_cls_counter(cs, dev_name, false);
+
+ if (!res) {
+ pr_debug("cls cant read for cls");
+ return -EINVAL;
+ }
+
+ cb->fill(cb, rcv_label,
+ res_counter_read_u64(&res->rcv_counter, RES_USAGE));
+ cb->fill(cb, snd_label,
+ res_counter_read_u64(&res->snd_counter, RES_USAGE));
+
+ kfree(dev_name);
+ return 0;
+}
+#endif /*CONFIG_NET_CLS_COUNTER*/
+
static int write_classid(struct cgroup *cgrp, struct cftype *cft, u64
value)
{
- cgrp_cls_state(cgrp)->classid = (u32) value;
+ struct cgroup_cls_state *cgrp_cls = cgrp_cls_state(cgrp);
+ u32 *classid = &cgrp_cls->classid;
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+ u32 oldclassid = *classid;
+
+ if(find_cls_cgroup_by_classid(value)) {
+ pr_err("cls: classid %llu already exists\n", value);
+ return -EINVAL;
+ }
+
+ insert_cls_cgroup_entry(cgrp_cls);
+
+ if (oldclassid)
+ delete_cls_cgroup_entry(oldclassid);
+#endif /*CONFIG_NET_CLS_COUNTER*/
+ *classid = (u32) value;
+
return 0;
}
@@ -307,17 +400,57 @@ static struct tcf_proto_ops cls_cgroup_ops
__read_mostly = {
.owner = THIS_MODULE,
};
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+static inline int init_cgroup_counter(void)
+{
+ int dev_count = 0, ret = 0;
+ struct net_device *dev;
+ struct cftype *cft;
+
+ for_each_netdev(&init_net, dev)
+ ++dev_count;
+
+ cft = kmalloc(sizeof(struct cftype) * (dev_count + 1), GFP_KERNEL);
+ memset(cft, 0, sizeof(struct cftype) * (dev_count + 1));
+
+ cft_first = cft;
+
+ for_each_netdev(&init_net, dev) {
+ snprintf(cft->name, MAX_CFTYPE_NAME,
+ "%s.usage_in_bytes", dev->name);
+ cft->read_map = read_stat;
+ cft->private = RES_USAGE;
+ ++cft;
+ }
+/*terminate element*/
+ ret = cgroup_add_cftypes(&net_cls_subsys, cft_first);
+ if (ret)
+ pr_err("error adding cft for counting at cls_cgroup %d\n", ret);
+
+ return ret;
+}
+#endif
+
static int __init init_cgroup_cls(void)
{
int ret;
-
ret = cgroup_load_subsys(&net_cls_subsys);
if (ret)
goto out;
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+ ret = init_cgroup_counter();
+ if (ret)
+ goto unload;
+#endif
+
ret = register_tcf_proto_ops(&cls_cgroup_ops);
if (ret)
- cgroup_unload_subsys(&net_cls_subsys);
+ goto unload;
+
+ return 0;
+unload:
+ cgroup_unload_subsys(&net_cls_subsys);
out:
return ret;
@@ -328,6 +461,9 @@ static void __exit exit_cgroup_cls(void)
unregister_tcf_proto_ops(&cls_cgroup_ops);
cgroup_unload_subsys(&net_cls_subsys);
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+ kfree(cft_first);
+#endif
}
module_init(init_cgroup_cls);
diff --git a/net/sched/cls_counter_holder.c b/net/sched/cls_counter_holder.c
new file mode 100644
index 0000000..eb56298
--- /dev/null
+++ b/net/sched/cls_counter_holder.c
@@ -0,0 +1,145 @@
+/*
+ * net/sched/cls_counter_holder.c Interface for holding references of the
+ * net cls cgroup instances.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Alexey Perevalov <a.perevalov@...sung.com>
+ */
+
+
+#include <linux/export.h>
+#include <linux/module.h>
+#include <net/cls_cgroup.h>
+#include <net/cls_counter_holder.h>
+
+static struct rb_root classid_tree = RB_ROOT;
+static DEFINE_SPINLOCK(classid_tree_lock);
+
+struct entry {
+ struct cgroup_cls_state *data;
+ struct rb_node node;
+};
+
+static struct entry *find_entry(struct rb_root *root, const u32 classid)
+{
+ struct rb_node *node = root->rb_node;
+
+ while (node) {
+ struct entry *cls_entry = rb_entry(node, struct entry, node);
+ int result = 0;
+ if (!cls_entry || !cls_entry->data)
+ break;
+ result = cls_entry->data->classid - classid;
+
+ if (result < 0)
+ node = node->rb_left;
+ else if (result > 0)
+ node = node->rb_right;
+ else
+ return cls_entry;
+ }
+ return NULL;
+}
+
+void insert_cls_cgroup_entry(struct cgroup_cls_state *obj)
+{
+ struct rb_node **new;
+ struct rb_node *parent = NULL;
+ struct entry *new_entry;
+ unsigned long irq_flags = 0;
+
+ struct rb_root *root = &classid_tree;
+
+ spin_lock_irqsave(&classid_tree_lock, irq_flags);
+
+ new = &root->rb_node;
+
+ while (*new) {
+ struct entry *this = rb_entry(*new, struct entry, node);
+ /* Sort by classid, then by ifindex */
+ int result =
+ (this->data->classid - obj->classid);
+ parent = *new;
+ if (result < 0)
+ new = &((*new)->rb_left);
+ else if (result > 0)
+ new = &((*new)->rb_right);
+ else
+ goto unlock;
+ }
+
+ /* If we here, we need to insert new entry into tree */
+ new_entry = kmalloc(sizeof(struct entry), GFP_ATOMIC);
+ if (!new_entry)
+ goto unlock;
+
+ new_entry->data = obj;
+ /* Add new node and rebalance tree */
+ rb_link_node(&new_entry->node, parent, new);
+ rb_insert_color(&new_entry->node, root);
+
+unlock:
+ spin_unlock_irqrestore(&classid_tree_lock, irq_flags);
+}
+EXPORT_SYMBOL(insert_cls_cgroup_entry);
+
+void delete_cls_cgroup_entry(const u32 classid)
+{
+ unsigned long irq_flags = 0;
+ struct entry *data = NULL;
+ struct rb_root *root = &classid_tree;
+ spin_lock_irqsave(&classid_tree_lock, irq_flags);
+
+ data = find_entry(root, classid);
+
+ if (data) {
+ rb_erase(&data->node, root);
+ kfree(data);
+ }
+ spin_unlock_irqrestore(&classid_tree_lock, irq_flags);
+}
+EXPORT_SYMBOL(delete_cls_cgroup_entry);
+
+static void free_node(struct rb_node *root)
+{
+ struct entry *cur_entry = rb_entry(root, struct entry, node);
+ if (root->rb_left)
+ free_node(root->rb_left);
+ if (root->rb_right)
+ free_node(root->rb_right);
+ if (cur_entry)
+ kfree(cur_entry);
+}
+
+static void free_classid_tree(void)
+{
+ unsigned long irq_flags = 0;
+
+ spin_lock_irqsave(&classid_tree_lock, irq_flags);
+
+ free_node(classid_tree.rb_node);
+
+ spin_unlock_irqrestore(&classid_tree_lock, irq_flags);
+}
+
+struct cgroup_cls_state *find_cls_cgroup_by_classid(const u32 classid)
+{
+ struct entry *cls_entry = find_entry(&classid_tree, classid);
+ if (cls_entry)
+ return cls_entry->data;
+
+ return NULL;
+}
+EXPORT_SYMBOL(find_cls_cgroup_by_classid);
+
+static void __exit exit_cls_counter_holder(void)
+{
+ free_classid_tree();
+}
+
+module_exit(exit_cls_counter_holder);
+MODULE_LICENSE("GPL");
--
1.7.9.5
--
Best regards,
Alexey Perevalov
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists