[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1386917330.19078.141.camel@edumazet-glaptop2.roam.corp.google.com>
Date: Thu, 12 Dec 2013 22:48:50 -0800
From: Eric Dumazet <eric.dumazet@...il.com>
To: Cong Wang <xiyou.wangcong@...il.com>
Cc: netdev@...r.kernel.org, Jamal Hadi Salim <jhs@...atatu.com>,
"David S. Miller" <davem@...emloft.net>
Subject: Re: [PATCH net-next v3 6/6] net_sched: convert tcf_hashinfo to
hlist and use rcu
On Thu, 2013-12-12 at 21:47 -0800, Cong Wang wrote:
> So that we don't need to play with singly linked list
> and of course we can use RCU+spinlock instead of rwlock
> now.
Just replace the rwlock by spinlock, no need for RCU ?
> Cc: Jamal Hadi Salim <jhs@...atatu.com>
> Cc: David S. Miller <davem@...emloft.net>
> Signed-off-by: Cong Wang <xiyou.wangcong@...il.com>
> ---
> include/net/act_api.h | 16 +++++++-----
> net/sched/act_api.c | 71 +++++++++++++++++++++++---------------------------
> net/sched/act_police.c | 47 ++++++++++++++-------------------
> 3 files changed, 62 insertions(+), 72 deletions(-)
>
> diff --git a/include/net/act_api.h b/include/net/act_api.h
> index 2678b67..18af68e 100644
> --- a/include/net/act_api.h
> +++ b/include/net/act_api.h
> @@ -9,7 +9,7 @@
> #include <net/pkt_sched.h>
>
> struct tcf_common {
> - struct tcf_common *tcfc_next;
> + struct hlist_node tcfc_head;
> u32 tcfc_index;
> int tcfc_refcnt;
> int tcfc_bindcnt;
> @@ -22,7 +22,7 @@ struct tcf_common {
> spinlock_t tcfc_lock;
> struct rcu_head tcfc_rcu;
> };
> -#define tcf_next common.tcfc_next
> +#define tcf_head common.tcfc_head
> #define tcf_index common.tcfc_index
> #define tcf_refcnt common.tcfc_refcnt
> #define tcf_bindcnt common.tcfc_bindcnt
> @@ -36,9 +36,9 @@ struct tcf_common {
> #define tcf_rcu common.tcfc_rcu
>
> struct tcf_hashinfo {
> - struct tcf_common **htab;
> + struct hlist_head *htab;
> unsigned int hmask;
> - rwlock_t lock;
> + spinlock_t lock;
> };
>
> static inline unsigned int tcf_hash(u32 index, unsigned int hmask)
> @@ -48,12 +48,16 @@ static inline unsigned int tcf_hash(u32 index, unsigned int hmask)
>
> static inline int tcf_hashinfo_init(struct tcf_hashinfo *hf, unsigned int mask)
> {
> - rwlock_init(&hf->lock);
> + int i;
> +
> + spin_lock_init(&hf->lock);
> hf->hmask = mask;
> - hf->htab = kzalloc((mask + 1) * sizeof(struct tcf_common *),
> + hf->htab = kzalloc((mask + 1) * sizeof(struct hlist_head),
> GFP_KERNEL);
> if (!hf->htab)
> return -ENOMEM;
> + for (i = 0; i < mask + 1; i++)
> + INIT_HLIST_HEAD(&hf->htab[i]);
> return 0;
> }
>
> diff --git a/net/sched/act_api.c b/net/sched/act_api.c
> index 2641a8b..974e8f7 100644
> --- a/net/sched/act_api.c
> +++ b/net/sched/act_api.c
> @@ -29,25 +29,17 @@
>
> void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
> {
> - unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
> - struct tcf_common **p1p;
> -
> - for (p1p = &hinfo->htab[h]; *p1p; p1p = &(*p1p)->tcfc_next) {
> - if (*p1p == p) {
> - write_lock_bh(&hinfo->lock);
> - *p1p = p->tcfc_next;
> - write_unlock_bh(&hinfo->lock);
> - gen_kill_estimator(&p->tcfc_bstats,
> - &p->tcfc_rate_est);
> - /*
> - * gen_estimator est_timer() might access p->tcfc_lock
> - * or bstats, wait a RCU grace period before freeing p
> - */
> - kfree_rcu(p, tcfc_rcu);
> - return;
> - }
> - }
> - WARN_ON(1);
> + spin_lock_bh(&hinfo->lock);
> + hlist_del_rcu(&p->tcfc_head);
> + spin_unlock_bh(&hinfo->lock);
> + synchronize_rcu();
Why is this needed ?
> + gen_kill_estimator(&p->tcfc_bstats,
> + &p->tcfc_rate_est);
> + /*
> + * gen_estimator est_timer() might access p->tcfc_lock
> + * or bstats, wait a RCU grace period before freeing p
> + */
> + kfree_rcu(p, tcfc_rcu);
> }
> EXPORT_SYMBOL(tcf_hash_destroy);
>
> @@ -73,18 +65,19 @@ EXPORT_SYMBOL(tcf_hash_release);
> static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
> struct tc_action *a, struct tcf_hashinfo *hinfo)
> {
> + struct hlist_head *head;
> struct tcf_common *p;
> int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
> struct nlattr *nest;
>
> - read_lock_bh(&hinfo->lock);
> + rcu_read_lock_bh();
>
> s_i = cb->args[0];
>
> for (i = 0; i < (hinfo->hmask + 1); i++) {
> - p = hinfo->htab[tcf_hash(i, hinfo->hmask)];
> + head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
>
> - for (; p; p = p->tcfc_next) {
> + hlist_for_each_entry_rcu(p, head, tcfc_head) {
> index++;
> if (index < s_i)
> continue;
> @@ -107,7 +100,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
> }
> }
> done:
> - read_unlock_bh(&hinfo->lock);
> + rcu_read_unlock_bh();
> if (n_i)
> cb->args[0] += n_i;
> return n_i;
> @@ -120,7 +113,9 @@ nla_put_failure:
> static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
> struct tcf_hashinfo *hinfo)
> {
> - struct tcf_common *p, *s_p;
> + struct hlist_head *head;
> + struct hlist_node *n;
> + struct tcf_common *p;
> struct nlattr *nest;
> int i = 0, n_i = 0;
>
> @@ -130,14 +125,11 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
> if (nla_put_string(skb, TCA_KIND, a->ops->kind))
> goto nla_put_failure;
> for (i = 0; i < (hinfo->hmask + 1); i++) {
> - p = hinfo->htab[tcf_hash(i, hinfo->hmask)];
> -
> - while (p != NULL) {
> - s_p = p->tcfc_next;
> + head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
> + hlist_for_each_entry_safe(p, n, head, tcfc_head) {
> if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo))
> module_put(a->ops->owner);
> n_i++;
> - p = s_p;
> }
> }
> if (nla_put_u32(skb, TCA_FCNT, n_i))
> @@ -168,15 +160,15 @@ EXPORT_SYMBOL(tcf_generic_walker);
>
> struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo)
> {
> - struct tcf_common *p;
> + struct tcf_common *p = NULL;
> + struct hlist_head *head;
>
> - read_lock_bh(&hinfo->lock);
> - for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p;
> - p = p->tcfc_next) {
> + rcu_read_lock_bh();
> + head = &hinfo->htab[tcf_hash(index, hinfo->hmask)];
> + hlist_for_each_entry_rcu(p, head, tcfc_head)
> if (p->tcfc_index == index)
> break;
> - }
> - read_unlock_bh(&hinfo->lock);
> + rcu_read_unlock_bh();
>
> return p;
> }
> @@ -236,6 +228,7 @@ struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est,
> p->tcfc_bindcnt = 1;
>
> spin_lock_init(&p->tcfc_lock);
> + INIT_HLIST_NODE(&p->tcfc_head);
> p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo);
> p->tcfc_tm.install = jiffies;
> p->tcfc_tm.lastuse = jiffies;
> @@ -257,10 +250,10 @@ void tcf_hash_insert(struct tcf_common *p, struct tcf_hashinfo *hinfo)
> {
> unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
>
> - write_lock_bh(&hinfo->lock);
> - p->tcfc_next = hinfo->htab[h];
> - hinfo->htab[h] = p;
> - write_unlock_bh(&hinfo->lock);
> + spin_lock_bh(&hinfo->lock);
> + hlist_add_head_rcu(&p->tcfc_head, &hinfo->htab[h]);
> + spin_unlock_bh(&hinfo->lock);
> + synchronize_rcu();
Why is this needed ?
> }
> EXPORT_SYMBOL(tcf_hash_insert);
>
> diff --git a/net/sched/act_police.c b/net/sched/act_police.c
> index f201576..26dffca 100644
> --- a/net/sched/act_police.c
> +++ b/net/sched/act_police.c
> @@ -60,18 +60,19 @@ struct tc_police_compat {
> static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb,
> int type, struct tc_action *a)
> {
> + struct hlist_head *head;
> struct tcf_common *p;
> int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
> struct nlattr *nest;
>
> - read_lock_bh(&police_hash_info.lock);
> + rcu_read_lock_bh();
>
> s_i = cb->args[0];
>
> for (i = 0; i < (POL_TAB_MASK + 1); i++) {
> - p = police_hash_info.htab[tcf_hash(i, POL_TAB_MASK)];
> + head = &police_hash_info.htab[tcf_hash(i, POL_TAB_MASK)];
>
> - for (; p; p = p->tcfc_next) {
> + hlist_for_each_entry_rcu(p, head, tcfc_head) {
> index++;
> if (index < s_i)
> continue;
> @@ -94,7 +95,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
> }
> }
> done:
> - read_unlock_bh(&police_hash_info.lock);
> + rcu_read_unlock_bh();
> if (n_i)
> cb->args[0] += n_i;
> return n_i;
> @@ -106,25 +107,17 @@ nla_put_failure:
>
> static void tcf_police_destroy(struct tcf_police *p)
> {
> - unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK);
> - struct tcf_common **p1p;
> -
> - for (p1p = &police_hash_info.htab[h]; *p1p; p1p = &(*p1p)->tcfc_next) {
> - if (*p1p == &p->common) {
> - write_lock_bh(&police_hash_info.lock);
> - *p1p = p->tcf_next;
> - write_unlock_bh(&police_hash_info.lock);
> - gen_kill_estimator(&p->tcf_bstats,
> - &p->tcf_rate_est);
> - /*
> - * gen_estimator est_timer() might access p->tcf_lock
> - * or bstats, wait a RCU grace period before freeing p
> - */
> - kfree_rcu(p, tcf_rcu);
> - return;
> - }
> - }
> - WARN_ON(1);
> + spin_lock_bh(&police_hash_info.lock);
> + hlist_del_rcu(&p->tcf_head);
> + spin_unlock_bh(&police_hash_info.lock);
> + synchronize_rcu();
Why is this needed ?
> + gen_kill_estimator(&p->tcf_bstats,
> + &p->tcf_rate_est);
> + /*
> + * gen_estimator est_timer() might access p->tcf_lock
> + * or bstats, wait a RCU grace period before freeing p
> + */
> + kfree_rcu(p, tcf_rcu);
> }
>
> static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
> @@ -259,10 +252,10 @@ override:
> police->tcf_index = parm->index ? parm->index :
> tcf_hash_new_index(&police_idx_gen, &police_hash_info);
> h = tcf_hash(police->tcf_index, POL_TAB_MASK);
> - write_lock_bh(&police_hash_info.lock);
> - police->tcf_next = police_hash_info.htab[h];
> - police_hash_info.htab[h] = &police->common;
> - write_unlock_bh(&police_hash_info.lock);
> + spin_lock_bh(&police_hash_info.lock);
> + hlist_add_head_rcu(&police->tcf_head, &police_hash_info.htab[h]);
> + spin_unlock_bh(&police_hash_info.lock);
> + synchronize_rcu();
Why is this needed ?
>
> a->priv = police;
> return ret;
Really, before sending RCU conversions, you need to understand how it
works.
There are plenty documentations and samples, I do not need to even
explain more.
Anyway, do you _really_ need RCU in management path at all ?
RCU is generally harder to understand and maintain than plain
traditional locking, so its use should be limited to the places
it really is needed.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists