[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <55E4E89A.8060508@redhat.com>
Date: Mon, 31 Aug 2015 19:51:54 -0400
From: Vlad Yasevich <vyasevic@...hat.com>
To: Xin Long <lucien.xin@...il.com>,
network dev <netdev@...r.kernel.org>
CC: mleitner@...hat.com, daniel@...earbox.net, davem@...emloft.net
Subject: Re: [PATCH net] sctp: support global vtag assochash and per endpoint
s(d)port assochash table
On 08/31/2015 01:44 PM, Xin Long wrote:
> for telecom center, the usual case is that a server is connected by thousands
> of clients. but if the server with only one enpoint(udp style) use the same
> sport and dport to communicate with every clients, and every assoc in server
> will be hashed in the same chain of global assoc hashtable due to currently we
> choose dport and sport as the hash key.
>
> when a packet is received, sctp_rcv try to find the assoc with sport and dport,
> since that chain is too long to find it fast, it make the performance turn to
> very low, some test data is as follow:
>
> in server:
> $./ss [start a udp server there]
> in client:
> $./cc [start 2500 sockets to connect server with same port and different ip,
> and use one of them to send data to server]
>
> *spend time is 854s, send pkt is 10000000*
>
> in server: #perf top
> 46.60% [kernel] [k] sctp_assoc_is_match
> 8.81% [kernel] [k] sctp_v4_cmp_addr
> 5.15% [kernel] [k] sctp_assoc_lookup_paddr
> ...
>
> in client: #perf top
> 88.42% [kernel] [k] __sctp_endpoint_lookup_assoc
> 2.06% libnl-3.so.200.16.1 [.] nl_object_identical
> 1.23% libnl-3.so.200.16.1 [.] nl_addr_cmp
> ...
>
> we need to change the way to calculate the hash key, vtag is good value for
> that, insteading the sport and dport. this way can work well for looking for
> assoc in sctp_rcv.
> becides, for the clients, if we turn the dport and sport global hash to per
> endpoint's, which can make sctp_sendmsg look up assoc more quickly.
>
> after that, the data of the same test is:
>
> *spend time is 25s, send pkt is 10000000*
>
> in server: #perf top
> 9.92% libnl-3.so.200.16.1 [.] nl_object_identical
> 6.05% libnl-3.so.200.16.1 [.] nl_addr_cmp
> 4.72% libc-2.17.so [.] __memcmp_sse2
> ...
>
> in client: #perf top
> 6.79% libc-2.17.so [.] __libc_calloc
> 6.50% [kernel] [k] memcpy
> 6.35% libc-2.17.so [.] _int_free
> ...
>
> Signed-off-by: Xin Long <lucien.xin@...il.com>
> ---
> include/net/sctp/sctp.h | 8 +--
> include/net/sctp/structs.h | 8 ++-
> net/sctp/endpointola.c | 22 +++++--
> net/sctp/input.c | 160 +++++++++++++++++++++++++++++----------------
> 4 files changed, 129 insertions(+), 69 deletions(-)
>
> diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
> index ce13cf2..df14452 100644
> --- a/include/net/sctp/sctp.h
> +++ b/include/net/sctp/sctp.h
> @@ -524,18 +524,16 @@ static inline int sctp_assoc_hashfn(struct net *net, __u16 lport, __u16 rport)
> {
> int h = (lport << 16) + rport + net_hash_mix(net);
> h ^= h>>8;
> - return h & (sctp_assoc_hashsize - 1);
> + return h & (256 - 1);
In addition to what David said and looking at it from a different angle... 256 buckets
may not be enough for someone with a single endpoint and alot of associations. You
will still hit a long chain on INIT and COOKIE-ECHO chunks.
Switching to using rhashtable for association hash would be very nice.
Also, see if you can split this into 2 parts, one that does vtag hash and
the other is refactoring. It would make it much easier to review.
-vlad
> }
>
> /* This is the hash function for the association hash table. This is
> * not used yet, but could be used as a better hash function when
> * we have a vtag.
> */
> -static inline int sctp_vtag_hashfn(__u16 lport, __u16 rport, __u32 vtag)
> +static inline int sctp_vtag_hashfn(struct net *net, __u32 vtag)
> {
> - int h = (lport << 16) + rport;
> - h ^= vtag;
> - return h & (sctp_assoc_hashsize - 1);
> + return vtag & (sctp_assoc_hashsize - 1);
> }
>
> #define sctp_for_each_hentry(epb, head) \
> diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
> index 495c87e..e0edfed 100644
> --- a/include/net/sctp/structs.h
> +++ b/include/net/sctp/structs.h
> @@ -1150,6 +1150,9 @@ struct sctp_ep_common {
> struct hlist_node node;
> int hashent;
>
> + struct hlist_node node_ep;
> + int hashent_ep;
> +
> /* Runtime type information. What kind of endpoint is this? */
> sctp_endpoint_type_t type;
>
> @@ -1210,6 +1213,8 @@ struct sctp_endpoint {
> /* This is really a list of struct sctp_association entries. */
> struct list_head asocs;
>
> + struct sctp_hashbucket *asocshash;
> +
> /* Secret Key: A secret key used by this endpoint to compute
> * the MAC. This SHOULD be a cryptographic quality
> * random number with a sufficient length.
> @@ -1272,7 +1277,8 @@ int sctp_endpoint_is_peeled_off(struct sctp_endpoint *,
> const union sctp_addr *);
> struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *,
> struct net *, const union sctp_addr *);
> -int sctp_has_association(struct net *net, const union sctp_addr *laddr,
> +int sctp_has_association(struct net *net, struct sctp_endpoint *ep,
> + const union sctp_addr *laddr,
> const union sctp_addr *paddr);
>
> int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep,
> diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
> index 9da76ba..adcaded 100644
> --- a/net/sctp/endpointola.c
> +++ b/net/sctp/endpointola.c
> @@ -62,7 +62,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
> struct sctp_hmac_algo_param *auth_hmacs = NULL;
> struct sctp_chunks_param *auth_chunks = NULL;
> struct sctp_shared_key *null_key;
> - int err;
> + int err, i;
>
> ep->digest = kzalloc(SCTP_SIGNATURE_SIZE, gfp);
> if (!ep->digest)
> @@ -133,6 +133,16 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
> /* Create the lists of associations. */
> INIT_LIST_HEAD(&ep->asocs);
>
> + /* Create hash table of associations. */
> + ep->asocshash = kmalloc(256 * sizeof(struct sctp_hashbucket), gfp);
> + if (!ep->asocshash)
> + goto nomem_asocshash;
> +
> + for (i = 0; i < 256; i++) {
> + rwlock_init(&ep->asocshash[i].lock);
> + INIT_HLIST_HEAD(&ep->asocshash[i].chain);
> + }
> +
> /* Use SCTP specific send buffer space queues. */
> ep->sndbuf_policy = net->sctp.sndbuf_policy;
>
> @@ -170,12 +180,13 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
> nomem_hmacs:
> sctp_auth_destroy_keys(&ep->endpoint_shared_keys);
> nomem:
> + kfree(ep->asocshash);
> +nomem_asocshash:
> /* Free all allocations */
> kfree(auth_hmacs);
> kfree(auth_chunks);
> kfree(ep->digest);
> return NULL;
> -
> }
>
> /* Create a sctp_endpoint with all that boring stuff initialized.
> @@ -342,9 +353,9 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc(
>
> hash = sctp_assoc_hashfn(sock_net(ep->base.sk), ep->base.bind_addr.port,
> rport);
> - head = &sctp_assoc_hashtable[hash];
> + head = &ep->asocshash[hash];
> read_lock(&head->lock);
> - sctp_for_each_hentry(epb, &head->chain) {
> + hlist_for_each_entry(epb, &head->chain, node_ep) {
> tmp = sctp_assoc(epb);
> if (tmp->ep != ep || rport != tmp->peer.port)
> continue;
> @@ -356,6 +367,7 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc(
> break;
> }
> }
> +
> read_unlock(&head->lock);
> out:
> return asoc;
> @@ -391,7 +403,7 @@ int sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep,
> * so the address_list can not change.
> */
> list_for_each_entry(addr, &bp->address_list, list) {
> - if (sctp_has_association(net, &addr->a, paddr))
> + if (sctp_has_association(net, ep, &addr->a, paddr))
> return 1;
> }
>
> diff --git a/net/sctp/input.c b/net/sctp/input.c
> index b6493b3..48dc724 100644
> --- a/net/sctp/input.c
> +++ b/net/sctp/input.c
> @@ -59,18 +59,26 @@
>
> /* Forward declarations for internal helpers. */
> static int sctp_rcv_ootb(struct sk_buff *);
> -static struct sctp_association *__sctp_rcv_lookup(struct net *net,
> - struct sk_buff *skb,
> - const union sctp_addr *paddr,
> - const union sctp_addr *laddr,
> - struct sctp_transport **transportp);
> static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net,
> const union sctp_addr *laddr);
> -static struct sctp_association *__sctp_lookup_association(
> +static struct sctp_association *__sctp_vtag_lookup_assoc(
> struct net *net,
> + __u32 vtag,
> const union sctp_addr *local,
> const union sctp_addr *peer,
> struct sctp_transport **pt);
> +static struct sctp_association *__sctp_addrep_lookup_assoc(
> + struct net *net,
> + struct sctp_endpoint *ep,
> + const union sctp_addr *local,
> + const union sctp_addr *peer,
> + struct sctp_transport **pt);
> +static struct sctp_association *__sctp_rcv_lookup_harder(
> + struct net *net,
> + struct sk_buff *skb,
> + struct sctp_endpoint *ep,
> + const union sctp_addr *laddr,
> + struct sctp_transport **transportp);
>
> static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb);
>
> @@ -107,7 +115,7 @@ struct sctp_input_cb {
> int sctp_rcv(struct sk_buff *skb)
> {
> struct sock *sk;
> - struct sctp_association *asoc;
> + struct sctp_association *asoc = NULL;
> struct sctp_endpoint *ep = NULL;
> struct sctp_ep_common *rcvr;
> struct sctp_transport *transport = NULL;
> @@ -171,11 +179,21 @@ int sctp_rcv(struct sk_buff *skb)
> !af->addr_valid(&dest, NULL, skb))
> goto discard_it;
>
> - asoc = __sctp_rcv_lookup(net, skb, &src, &dest, &transport);
> + if (sh->vtag)
> + asoc = __sctp_vtag_lookup_assoc(net, ntohl(sh->vtag), &dest,
> + &src, &transport);
>
> - if (!asoc)
> + if (!asoc) {
> ep = __sctp_rcv_lookup_endpoint(net, &dest);
>
> + asoc = __sctp_addrep_lookup_assoc(net, ep, &dest,
> + &src, &transport);
> + if (!asoc)
> + asoc = __sctp_rcv_lookup_harder(net, skb, ep, &dest,
> + &transport);
> + if (asoc)
> + sctp_endpoint_put(ep);
> + }
> /* Retrieve the common input handling substructure. */
> rcvr = asoc ? &asoc->base : &ep->base;
> sk = rcvr->sk;
> @@ -476,7 +494,8 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb,
> union sctp_addr daddr;
> struct sctp_af *af;
> struct sock *sk = NULL;
> - struct sctp_association *asoc;
> + struct sctp_association *asoc = NULL;
> + struct sctp_endpoint *ep;
> struct sctp_transport *transport = NULL;
> struct sctp_init_chunk *chunkhdr;
> __u32 vtag = ntohl(sctphdr->vtag);
> @@ -496,7 +515,15 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb,
> /* Look for an association that matches the incoming ICMP error
> * packet.
> */
> - asoc = __sctp_lookup_association(net, &saddr, &daddr, &transport);
> + if (vtag)
> + asoc = __sctp_vtag_lookup_assoc(net, vtag, &saddr, &daddr,
> + &transport);
> + if (!asoc) {
> + ep = __sctp_rcv_lookup_endpoint(net, &saddr);
> + asoc = __sctp_addrep_lookup_assoc(net, ep, &saddr, &saddr,
> + &transport);
> + sctp_endpoint_put(ep);
> + }
> if (!asoc)
> return NULL;
>
> @@ -792,14 +819,22 @@ static void __sctp_hash_established(struct sctp_association *asoc)
> epb = &asoc->base;
>
> /* Calculate which chain this entry will belong to. */
> - epb->hashent = sctp_assoc_hashfn(net, epb->bind_addr.port,
> - asoc->peer.port);
> + epb->hashent = sctp_vtag_hashfn(net, asoc->c.my_vtag);
>
> head = &sctp_assoc_hashtable[epb->hashent];
>
> write_lock(&head->lock);
> hlist_add_head(&epb->node, &head->chain);
> write_unlock(&head->lock);
> +
> + /* add it to per endpoint hashtable*/
> + epb->hashent_ep = sctp_assoc_hashfn(net, epb->bind_addr.port,
> + asoc->peer.port);
> + head = &asoc->ep->asocshash[epb->hashent_ep];
> +
> + write_lock(&head->lock);
> + hlist_add_head(&epb->node_ep, &head->chain);
> + write_unlock(&head->lock);
> }
>
> /* Add an association to the hash. Local BH-safe. */
> @@ -822,14 +857,22 @@ static void __sctp_unhash_established(struct sctp_association *asoc)
>
> epb = &asoc->base;
>
> - epb->hashent = sctp_assoc_hashfn(net, epb->bind_addr.port,
> - asoc->peer.port);
> + epb->hashent = sctp_vtag_hashfn(net, asoc->c.my_vtag);
>
> head = &sctp_assoc_hashtable[epb->hashent];
>
> write_lock(&head->lock);
> hlist_del_init(&epb->node);
> write_unlock(&head->lock);
> +
> + /* del it from per endpoint hashtable */
> + epb->hashent_ep = sctp_assoc_hashfn(net, epb->bind_addr.port,
> + asoc->peer.port);
> + head = &asoc->ep->asocshash[epb->hashent_ep];
> +
> + write_lock(&head->lock);
> + hlist_del_init(&epb->node_ep);
> + write_unlock(&head->lock);
> }
>
> /* Remove association from the hash table. Local BH-safe. */
> @@ -843,9 +886,9 @@ void sctp_unhash_established(struct sctp_association *asoc)
> local_bh_enable();
> }
>
> -/* Look up an association. */
> -static struct sctp_association *__sctp_lookup_association(
> +static struct sctp_association *__sctp_vtag_lookup_assoc(
> struct net *net,
> + __u32 vtag,
> const union sctp_addr *local,
> const union sctp_addr *peer,
> struct sctp_transport **pt)
> @@ -856,12 +899,9 @@ static struct sctp_association *__sctp_lookup_association(
> struct sctp_transport *transport;
> int hash;
>
> - /* Optimize here for direct hit, only listening connections can
> - * have wildcards anyways.
> - */
> - hash = sctp_assoc_hashfn(net, ntohs(local->v4.sin_port),
> - ntohs(peer->v4.sin_port));
> + hash = sctp_vtag_hashfn(net, vtag);
> head = &sctp_assoc_hashtable[hash];
> +
> read_lock(&head->lock);
> sctp_for_each_hentry(epb, &head->chain) {
> asoc = sctp_assoc(epb);
> @@ -873,7 +913,6 @@ static struct sctp_association *__sctp_lookup_association(
> read_unlock(&head->lock);
>
> return NULL;
> -
> hit:
> *pt = transport;
> sctp_association_hold(asoc);
> @@ -881,31 +920,17 @@ hit:
> return asoc;
> }
>
> -/* Look up an association. BH-safe. */
> -static
> -struct sctp_association *sctp_lookup_association(struct net *net,
> - const union sctp_addr *laddr,
> - const union sctp_addr *paddr,
> - struct sctp_transport **transportp)
> -{
> - struct sctp_association *asoc;
> -
> - local_bh_disable();
> - asoc = __sctp_lookup_association(net, laddr, paddr, transportp);
> - local_bh_enable();
> -
> - return asoc;
> -}
> -
> /* Is there an association matching the given local and peer addresses? */
> int sctp_has_association(struct net *net,
> + struct sctp_endpoint *ep,
> const union sctp_addr *laddr,
> const union sctp_addr *paddr)
> {
> struct sctp_association *asoc;
> struct sctp_transport *transport;
>
> - if ((asoc = sctp_lookup_association(net, laddr, paddr, &transport))) {
> + asoc = __sctp_addrep_lookup_assoc(net, ep, laddr, paddr, &transport);
> + if (asoc) {
> sctp_association_put(asoc);
> return 1;
> }
> @@ -933,6 +958,7 @@ int sctp_has_association(struct net *net,
> */
> static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
> struct sk_buff *skb,
> + struct sctp_endpoint *ep,
> const union sctp_addr *laddr, struct sctp_transport **transportp)
> {
> struct sctp_association *asoc;
> @@ -972,7 +998,8 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
>
> af->from_addr_param(paddr, params.addr, sh->source, 0);
>
> - asoc = __sctp_lookup_association(net, laddr, paddr, &transport);
> + asoc = __sctp_addrep_lookup_assoc(net, ep, laddr, paddr,
> + &transport);
> if (asoc)
> return asoc;
> }
> @@ -997,6 +1024,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
> static struct sctp_association *__sctp_rcv_asconf_lookup(
> struct net *net,
> sctp_chunkhdr_t *ch,
> + struct sctp_endpoint *ep,
> const union sctp_addr *laddr,
> __be16 peer_port,
> struct sctp_transport **transportp)
> @@ -1015,7 +1043,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup(
>
> af->from_addr_param(&paddr, param, peer_port, 0);
>
> - return __sctp_lookup_association(net, laddr, &paddr, transportp);
> + return __sctp_addrep_lookup_assoc(net, ep, laddr, &paddr, transportp);
> }
>
>
> @@ -1030,6 +1058,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup(
> */
> static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net,
> struct sk_buff *skb,
> + struct sctp_endpoint *ep,
> const union sctp_addr *laddr,
> struct sctp_transport **transportp)
> {
> @@ -1072,7 +1101,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net,
> case SCTP_CID_ASCONF:
> if (have_auth || net->sctp.addip_noauth)
> asoc = __sctp_rcv_asconf_lookup(
> - net, ch, laddr,
> + net, ch, ep, laddr,
> sctp_hdr(skb)->source,
> transportp);
> default:
> @@ -1097,6 +1126,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net,
> */
> static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
> struct sk_buff *skb,
> + struct sctp_endpoint *ep,
> const union sctp_addr *laddr,
> struct sctp_transport **transportp)
> {
> @@ -1114,28 +1144,42 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
>
> /* If this is INIT/INIT-ACK look inside the chunk too. */
> if (ch->type == SCTP_CID_INIT || ch->type == SCTP_CID_INIT_ACK)
> - return __sctp_rcv_init_lookup(net, skb, laddr, transportp);
> + return __sctp_rcv_init_lookup(net, skb, ep, laddr, transportp);
>
> - return __sctp_rcv_walk_lookup(net, skb, laddr, transportp);
> + return __sctp_rcv_walk_lookup(net, skb, ep, laddr, transportp);
> }
>
> -/* Lookup an association for an inbound skb. */
> -static struct sctp_association *__sctp_rcv_lookup(struct net *net,
> - struct sk_buff *skb,
> - const union sctp_addr *paddr,
> - const union sctp_addr *laddr,
> - struct sctp_transport **transportp)
> +static struct sctp_association *__sctp_addrep_lookup_assoc(
> + struct net *net,
> + struct sctp_endpoint *ep,
> + const union sctp_addr *local,
> + const union sctp_addr *peer,
> + struct sctp_transport **pt)
> {
> + struct sctp_hashbucket *head;
> + struct sctp_ep_common *epb;
> struct sctp_association *asoc;
> + struct sctp_transport *transport;
> + int hash;
>
> - asoc = __sctp_lookup_association(net, laddr, paddr, transportp);
> + hash = sctp_assoc_hashfn(net, ntohs(local->v4.sin_port),
> + ntohs(peer->v4.sin_port));
> + head = &ep->asocshash[hash];
>
> - /* Further lookup for INIT/INIT-ACK packets.
> - * SCTP Implementors Guide, 2.18 Handling of address
> - * parameters within the INIT or INIT-ACK.
> - */
> - if (!asoc)
> - asoc = __sctp_rcv_lookup_harder(net, skb, laddr, transportp);
> + read_lock(&head->lock);
> + hlist_for_each_entry(epb, &head->chain, node_ep) {
> + asoc = sctp_assoc(epb);
> + transport = sctp_assoc_is_match(asoc, net, local, peer);
> + if (transport)
> + goto hit;
> + }
> +
> + read_unlock(&head->lock);
>
> + return NULL;
> +hit:
> + *pt = transport;
> + sctp_association_hold(asoc);
> + read_unlock(&head->lock);
> return asoc;
> }
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists