lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1515440080-27377-10-git-send-email-jon.maloy@ericsson.com>
Date:   Mon, 8 Jan 2018 20:34:40 +0100
From:   Jon Maloy <jon.maloy@...csson.com>
To:     <davem@...emloft.net>, <netdev@...r.kernel.org>
CC:     <mohan.krishna.ghanta.krishnamurthy@...csson.com>,
        <tung.q.nguyen@...tech.com.au>, <hoang.h.le@...tech.com.au>,
        <jon.maloy@...csson.com>, <canh.d.luu@...tech.com.au>,
        <ying.xue@...driver.com>, <tipc-discussion@...ts.sourceforge.net>
Subject: [net-next 9/9] tipc: improve poll() for group member socket

The current criteria for returning POLLOUT from a group member socket is
too simplistic. It basically returns POLLOUT as soon as the group has
external destinations, something obviously leading to a lot of spinning
during destination congestion situations. At the same time, the internal
congestion handling is unnecessarily complex.

We now change this as follows.

- We introduce an 'open' flag in  struct tipc_group. This flag is used
  only to help poll() get the setting of POLLOUT right, and *not* for
  congeston handling as such. This means that a user can choose to
  ignore an  EAGAIN for a destination and go on sending messages to
  other destinations in the group if he wants to.

- The flag is set to false every time we return EAGAIN on a send call.

- The flag is set to true every time any member, i.e., not necessarily
  the member that caused EAGAIN, is removed from the small_win list.

- We remove the group member 'usr_pending' flag. The size of the send
  window and presence in the 'small_win' list is sufficient criteria
  for recognizing congestion.

This solution seems to be a reasonable compromise between 'anycast',
which is normally not waiting for POLLOUT for a specific destination,
and the other three send modes, which are.

Acked-by: Ying Xue <ying.xue@...driver.com>
Signed-off-by: Jon Maloy <jon.maloy@...csson.com>
---
 net/tipc/group.c  | 64 +++++++++++++++++++++++++++++++------------------------
 net/tipc/group.h  |  2 +-
 net/tipc/socket.c |  5 ++---
 3 files changed, 39 insertions(+), 32 deletions(-)

diff --git a/net/tipc/group.c b/net/tipc/group.c
index 1908773..497ee34 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -74,7 +74,6 @@ struct tipc_member {
 	u16 bc_rcv_nxt;
 	u16 bc_syncpt;
 	u16 bc_acked;
-	bool usr_pending;
 };
 
 struct tipc_group {
@@ -96,11 +95,27 @@ struct tipc_group {
 	u16 bc_ackers;
 	bool loopback;
 	bool events;
+	bool open;
 };
 
 static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
 				  int mtyp, struct sk_buff_head *xmitq);
 
+bool tipc_group_is_open(struct tipc_group *grp)
+{
+	return grp->open;
+}
+
+static void tipc_group_open(struct tipc_member *m, bool *wakeup)
+{
+	*wakeup = false;
+	if (list_empty(&m->small_win))
+		return;
+	list_del_init(&m->small_win);
+	m->group->open = true;
+	*wakeup = true;
+}
+
 static void tipc_group_decr_active(struct tipc_group *grp,
 				   struct tipc_member *m)
 {
@@ -406,20 +421,20 @@ bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport,
 	int adv, state;
 
 	m = tipc_group_find_dest(grp, dnode, dport);
-	*mbr = m;
-	if (!m)
+	if (!tipc_group_is_receiver(m)) {
+		*mbr = NULL;
 		return false;
-	if (m->usr_pending)
-		return true;
+	}
+	*mbr = m;
+
 	if (m->window >= len)
 		return false;
-	m->usr_pending = true;
+
+	grp->open = false;
 
 	/* If not fully advertised, do it now to prevent mutual blocking */
 	adv = m->advertised;
 	state = m->state;
-	if (state < MBR_JOINED)
-		return true;
 	if (state == MBR_JOINED && adv == ADV_IDLE)
 		return true;
 	if (state == MBR_ACTIVE && adv == ADV_ACTIVE)
@@ -437,9 +452,10 @@ bool tipc_group_bc_cong(struct tipc_group *grp, int len)
 	struct tipc_member *m = NULL;
 
 	/* If prev bcast was replicast, reject until all receivers have acked */
-	if (grp->bc_ackers)
+	if (grp->bc_ackers) {
+		grp->open = false;
 		return true;
-
+	}
 	if (list_empty(&grp->small_win))
 		return false;
 
@@ -754,9 +770,7 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
 
 		/* Member can be taken into service */
 		m->state = MBR_JOINED;
-		*usr_wakeup = true;
-		m->usr_pending = false;
-		list_del_init(&m->small_win);
+		tipc_group_open(m, usr_wakeup);
 		tipc_group_update_member(m, 0);
 		tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
 		tipc_group_create_event(grp, m, TIPC_PUBLISHED,
@@ -767,8 +781,7 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
 			return;
 		m->bc_syncpt = msg_grp_bc_syncpt(hdr);
 		list_del_init(&m->list);
-		list_del_init(&m->small_win);
-		*usr_wakeup = true;
+		tipc_group_open(m, usr_wakeup);
 		tipc_group_decr_active(grp, m);
 		m->state = MBR_LEAVING;
 		tipc_group_create_event(grp, m, TIPC_WITHDRAWN,
@@ -778,26 +791,25 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
 		if (!m)
 			return;
 		m->window += msg_adv_win(hdr);
-		*usr_wakeup = m->usr_pending;
-		m->usr_pending = false;
-		list_del_init(&m->small_win);
+		tipc_group_open(m, usr_wakeup);
 		return;
 	case GRP_ACK_MSG:
 		if (!m)
 			return;
 		m->bc_acked = msg_grp_bc_acked(hdr);
 		if (--grp->bc_ackers)
-			break;
+			return;
+		list_del_init(&m->small_win);
+		m->group->open = true;
 		*usr_wakeup = true;
-		m->usr_pending = false;
+		tipc_group_update_member(m, 0);
 		return;
 	case GRP_RECLAIM_MSG:
 		if (!m)
 			return;
-		*usr_wakeup = m->usr_pending;
-		m->usr_pending = false;
 		tipc_group_proto_xmit(grp, m, GRP_REMIT_MSG, xmitq);
 		m->window = ADV_IDLE;
+		tipc_group_open(m, usr_wakeup);
 		return;
 	case GRP_REMIT_MSG:
 		if (!m || m->state != MBR_RECLAIMING)
@@ -883,9 +895,7 @@ void tipc_group_member_evt(struct tipc_group *grp,
 		/* Member can be taken into service */
 		m->instance = instance;
 		m->state = MBR_JOINED;
-		*usr_wakeup = true;
-		m->usr_pending = false;
-		list_del_init(&m->small_win);
+		tipc_group_open(m, usr_wakeup);
 		tipc_group_update_member(m, 0);
 		tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq);
 		tipc_group_create_event(grp, m, TIPC_PUBLISHED,
@@ -895,12 +905,10 @@ void tipc_group_member_evt(struct tipc_group *grp,
 		if (!m)
 			break;
 
-		*usr_wakeup = true;
-		m->usr_pending = false;
 		tipc_group_decr_active(grp, m);
 		m->state = MBR_LEAVING;
 		list_del_init(&m->list);
-		list_del_init(&m->small_win);
+		tipc_group_open(m, usr_wakeup);
 
 		/* Only send event if no LEAVE message can be expected */
 		if (!tipc_node_is_up(net, node))
diff --git a/net/tipc/group.h b/net/tipc/group.h
index dee7947..f4a596e 100644
--- a/net/tipc/group.h
+++ b/net/tipc/group.h
@@ -67,9 +67,9 @@ void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack);
 bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport,
 		     int len, struct tipc_member **m);
 bool tipc_group_bc_cong(struct tipc_group *grp, int len);
+bool tipc_group_is_open(struct tipc_group *grp);
 void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
 			       u32 port, struct sk_buff_head *xmitq);
 u16 tipc_group_bc_snd_nxt(struct tipc_group *grp);
 void tipc_group_update_member(struct tipc_member *m, int len);
-int tipc_group_size(struct tipc_group *grp);
 #endif
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index b24dab3..4660a5b 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -736,9 +736,8 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
 			revents |= POLLIN | POLLRDNORM;
 		break;
 	case TIPC_OPEN:
-		if (!grp || tipc_group_size(grp))
-			if (!tsk->cong_link_cnt)
-				revents |= POLLOUT;
+		if ((!grp || tipc_group_is_open(grp)) && !tsk->cong_link_cnt)
+			revents |= POLLOUT;
 		if (!tipc_sk_type_connectionless(sk))
 			break;
 		if (skb_queue_empty(&sk->sk_receive_queue))
-- 
2.1.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ