[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <A2BAEFC30C8FD34388F02C9B3121859D1C35FBC2@eusaamb103.ericsson.se>
Date: Mon, 19 Jan 2015 16:07:54 +0000
From: Jon Maloy <jon.maloy@...csson.com>
To: Erik Hugne <erik.hugne@...csson.com>,
Richard Alpe <richard.alpe@...csson.com>,
"ying.xue@...driver.com" <ying.xue@...driver.com>,
"netdev@...r.kernel.org" <netdev@...r.kernel.org>
CC: "tipc-discussion@...ts.sourceforge.net"
<tipc-discussion@...ts.sourceforge.net>
Subject: RE: [PATCH net-next] tipc: ratelimit network event traces
> -----Original Message-----
> From: Erik Hugne
> Sent: January-19-15 4:03 AM
> To: Richard Alpe; ying.xue@...driver.com; Jon Maloy;
> netdev@...r.kernel.org
> Cc: tipc-discussion@...ts.sourceforge.net; Erik Hugne
> Subject: [PATCH net-next] tipc: ratelimit network event traces
>
> From: Erik Hugne <erik.hugne@...csson.com>
>
> If a large number of namespaces is spawned on a node and TIPC is enabled in
> each of these, the excessive printk tracing of network events will cause the
> system to grind down to a near halt.
The patch is ok, but I don't quite understand how this can happen. Are you connecting
dozens of nodes in dozens of namespaces? How many "Established link'' printouts
are there? Even if there are hundreds of them in a burst, I don't quite understand how
this can kill the whole system. Just curious.
///jon
> We fix this by adding ratelimiting to the info/warning logs regarding link state
> and node availability.
>
> Signed-off-by: Erik Hugne <erik.hugne@...csson.com>
> Reviewed-by: Ying Xue <ying.xue@...driver.com>
> ---
> net/tipc/link.c | 21 +++++++++++---------- net/tipc/node.c | 24
> +++++++++++++-----------
> 2 files changed, 24 insertions(+), 21 deletions(-)
>
> diff --git a/net/tipc/link.c b/net/tipc/link.c index 193bc15..bedb590 100644
> --- a/net/tipc/link.c
> +++ b/net/tipc/link.c
> @@ -538,8 +538,8 @@ static void link_state_event(struct tipc_link *l_ptr,
> unsigned int event)
> link_set_timer(l_ptr, cont_intv / 4);
> break;
> case RESET_MSG:
> - pr_info("%s<%s>, requested by peer\n",
> link_rst_msg,
> - l_ptr->name);
> + pr_info_ratelimited("%s<%s>, requested by peer\n",
> + link_rst_msg, l_ptr->name);
> tipc_link_reset(l_ptr);
> l_ptr->state = RESET_RESET;
> l_ptr->fsm_msg_cnt = 0;
> @@ -549,7 +549,8 @@ static void link_state_event(struct tipc_link *l_ptr,
> unsigned int event)
> link_set_timer(l_ptr, cont_intv);
> break;
> default:
> - pr_err("%s%u in WW state\n", link_unk_evt, event);
> + pr_err_ratelimited("%s%u in WW state\n",
> link_unk_evt,
> + event);
> }
> break;
> case WORKING_UNKNOWN:
> @@ -561,8 +562,8 @@ static void link_state_event(struct tipc_link *l_ptr,
> unsigned int event)
> link_set_timer(l_ptr, cont_intv);
> break;
> case RESET_MSG:
> - pr_info("%s<%s>, requested by peer while
> probing\n",
> - link_rst_msg, l_ptr->name);
> + pr_info_ratelimited("%s<%s>, requested by peer
> while probing\n",
> + link_rst_msg, l_ptr->name);
> tipc_link_reset(l_ptr);
> l_ptr->state = RESET_RESET;
> l_ptr->fsm_msg_cnt = 0;
> @@ -588,8 +589,8 @@ static void link_state_event(struct tipc_link *l_ptr,
> unsigned int event)
> l_ptr->fsm_msg_cnt++;
> link_set_timer(l_ptr, cont_intv / 4);
> } else { /* Link has failed */
> - pr_warn("%s<%s>, peer not responding\n",
> - link_rst_msg, l_ptr->name);
> + pr_warn_ratelimited("%s<%s>, peer not
> responding\n",
> + link_rst_msg, l_ptr-
> >name);
> tipc_link_reset(l_ptr);
> l_ptr->state = RESET_UNKNOWN;
> l_ptr->fsm_msg_cnt = 0;
> @@ -1568,9 +1569,9 @@ static void tipc_link_proto_rcv(struct net *net,
> struct tipc_link *l_ptr,
>
> if (msg_linkprio(msg) &&
> (msg_linkprio(msg) != l_ptr->priority)) {
> - pr_warn("%s<%s>, priority change %u->%u\n",
> - link_rst_msg, l_ptr->name, l_ptr->priority,
> - msg_linkprio(msg));
> + pr_warn_ratelimited("%s<%s>, priority change %u-
> >%u\n",
> + link_rst_msg, l_ptr->name,
> + l_ptr->priority, msg_linkprio(msg));
> l_ptr->priority = msg_linkprio(msg);
> tipc_link_reset(l_ptr); /* Enforce change to take
> effect */
> break;
> diff --git a/net/tipc/node.c b/net/tipc/node.c index b1eb092..01a03a7 100644
> --- a/net/tipc/node.c
> +++ b/net/tipc/node.c
> @@ -230,8 +230,8 @@ void tipc_node_link_up(struct tipc_node *n_ptr,
> struct tipc_link *l_ptr)
> n_ptr->action_flags |= TIPC_NOTIFY_LINK_UP;
> n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id;
>
> - pr_info("Established link <%s> on network plane %c\n",
> - l_ptr->name, l_ptr->net_plane);
> + pr_info_ratelimited("Established link <%s> on network plane %c\n",
> + l_ptr->name, l_ptr->net_plane);
>
> if (!active[0]) {
> active[0] = active[1] = l_ptr;
> @@ -239,7 +239,8 @@ void tipc_node_link_up(struct tipc_node *n_ptr,
> struct tipc_link *l_ptr)
> goto exit;
> }
> if (l_ptr->priority < active[0]->priority) {
> - pr_info("New link <%s> becomes standby\n", l_ptr->name);
> + pr_info_ratelimited("New link <%s> becomes standby\n",
> + l_ptr->name);
> goto exit;
> }
> tipc_link_dup_queue_xmit(active[0], l_ptr); @@ -247,9 +248,10 @@
> void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
> active[0] = l_ptr;
> goto exit;
> }
> - pr_info("Old link <%s> becomes standby\n", active[0]->name);
> + pr_info_ratelimited("Old link <%s> becomes standby\n",
> +active[0]->name);
> if (active[1] != active[0])
> - pr_info("Old link <%s> becomes standby\n", active[1]-
> >name);
> + pr_info_ratelimited("Old link <%s> becomes standby\n",
> + active[1]->name);
> active[0] = active[1] = l_ptr;
> exit:
> /* Leave room for changeover header when returning 'mtu' to users:
> */ @@ -297,12 +299,12 @@ void tipc_node_link_down(struct tipc_node
> *n_ptr, struct tipc_link *l_ptr)
> n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id;
>
> if (!tipc_link_is_active(l_ptr)) {
> - pr_info("Lost standby link <%s> on network plane %c\n",
> - l_ptr->name, l_ptr->net_plane);
> + pr_info_ratelimited("Lost standby link <%s> on network
> plane %c\n",
> + l_ptr->name, l_ptr->net_plane);
> return;
> }
> - pr_info("Lost link <%s> on network plane %c\n",
> - l_ptr->name, l_ptr->net_plane);
> + pr_info_ratelimited("Lost link <%s> on network plane %c\n",
> + l_ptr->name, l_ptr->net_plane);
>
> active = &n_ptr->active_links[0];
> if (active[0] == l_ptr)
> @@ -380,8 +382,8 @@ static void node_lost_contact(struct tipc_node
> *n_ptr)
> char addr_string[16];
> u32 i;
>
> - pr_info("Lost contact with %s\n",
> - tipc_addr_string_fill(addr_string, n_ptr->addr));
> + pr_info_ratelimited("Lost contact with %s\n",
> + tipc_addr_string_fill(addr_string, n_ptr->addr));
>
> /* Flush broadcast link info associated with lost node */
> if (n_ptr->bclink.recv_permitted) {
> --
> 2.1.3
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists