[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <49CA8350.5040407@cosmosbay.com>
Date: Wed, 25 Mar 2009 20:17:36 +0100
From: Eric Dumazet <dada1@...mosbay.com>
To: Patrick McHardy <kaber@...sh.net>
CC: mbizon@...ebox.fr, "Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>,
Joakim Tjernlund <Joakim.Tjernlund@...nsmode.se>,
avorontsov@...mvista.com, netdev@...r.kernel.org,
Netfilter Developers <netfilter-devel@...r.kernel.org>
Subject: Re: [PATCH] conntrack: use SLAB_DESTROY_BY_RCU for nf_conn structs
Patrick McHardy a écrit :
> Eric Dumazet wrote:
>> Here is take 2 of the patch with proper ref counting on dumping.
>
> Thanks, one final question about the seq-file handling:
>
>> diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
>> b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
>> index 6ba5c55..0b870b9 100644
>> --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
>> +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
>> @@ -25,30 +25,30 @@ struct ct_iter_state {
>> unsigned int bucket;
>> };
>>
>> -static struct hlist_node *ct_get_first(struct seq_file *seq)
>> +static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
>> {
>> struct net *net = seq_file_net(seq);
>> struct ct_iter_state *st = seq->private;
>> - struct hlist_node *n;
>> + struct hlist_nulls_node *n;
>>
>> for (st->bucket = 0;
>> st->bucket < nf_conntrack_htable_size;
>> st->bucket++) {
>> n = rcu_dereference(net->ct.hash[st->bucket].first);
>> - if (n)
>> + if (!is_a_nulls(n))
>> return n;
>> }
>> return NULL;
>> }
>>
>> -static struct hlist_node *ct_get_next(struct seq_file *seq,
>> - struct hlist_node *head)
>> +static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
>> + struct hlist_nulls_node *head)
>> {
>> struct net *net = seq_file_net(seq);
>> struct ct_iter_state *st = seq->private;
>>
>> head = rcu_dereference(head->next);
>> - while (head == NULL) {
>> + while (is_a_nulls(head)) {
>> if (++st->bucket >= nf_conntrack_htable_size)
>> return NULL;
>> head = rcu_dereference(net->ct.hash[st->bucket].first);
>> @@ -56,9 +56,9 @@ static struct hlist_node *ct_get_next(struct
>> seq_file *seq,
>> return head;
>> }
>>
>> -static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos)
>> +static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq,
>> loff_t pos)
>> {
>> - struct hlist_node *head = ct_get_first(seq);
>> + struct hlist_nulls_node *head = ct_get_first(seq);
>>
>> if (head)
>> while (pos && (head = ct_get_next(seq, head)))
>> @@ -87,69 +87,76 @@ static void ct_seq_stop(struct seq_file *s, void *v)
>>
>> static int ct_seq_show(struct seq_file *s, void *v)
>> {
>> - const struct nf_conntrack_tuple_hash *hash = v;
>> - const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
>> + struct nf_conntrack_tuple_hash *hash = v;
>> + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
>> const struct nf_conntrack_l3proto *l3proto;
>> const struct nf_conntrack_l4proto *l4proto;
>> + int ret = 0;
>>
>> NF_CT_ASSERT(ct);
>> + if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
>> + return 0;
>
> Can we assume the next pointer still points to the next entry
> in the same chain after the refcount dropped to zero?
>
>
>
We are looking chain N.
If we cannot atomic_inc() refcount, we got some deleted entry.
If we could atomic_inc, we can meet an entry that just moved to another chain X
When hitting its end, we continue the search to the N+1 chain so we only
skip the end of previous chain (N). We can 'forget' some entries, we can print
several time one given entry.
We could solve this by :
1) Checking hash value : if not one expected ->
Going back to head of chain N, (potentially re-printing already handled entries)
So it is not a *perfect* solution.
2) Use a locking to forbid writers (as done in UDP/TCP), but it is expensive and
wont solve other problem :
We wont avoid emitting same entry several time anyway (this is a flaw of
current seq_file handling, since we 'count' entries to be skiped, and this is
wrong if some entries were deleted or inserted meanwhile)
We have same problem on /proc/net/udp & /proc/net/tcp, I am not sure we should care...
Also, current resizing code can give to a /proc/net/ip_conntrack reader a problem, since
hash table can switch while its doing its dumping : many entries might be lost or regiven...
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists