2.6.27-stable review patch. If anyone has any objections, please let us know. ------------------ From: Trond Myklebust This fixes a problem that was reported as Red Hat Bugzilla entry number 485339, in which rpciod starts looping on the TCP connection code, rendering the NFS client unusable for 1/2 minute or so. It is basically a backport of commit f75e6745aa3084124ae1434fd7629853bdaf6798 (SUNRPC: Fix the problem of EADDRNOTAVAIL syslog floods on reconnect) Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/xprt.c | 6 ++---- net/sunrpc/xprtsock.c | 37 ++++++++++++++++++++++++++++++++++--- 3 files changed, 37 insertions(+), 7 deletions(-) --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -260,6 +260,7 @@ void xprt_conditional_disconnect(struc #define XPRT_BOUND (4) #define XPRT_BINDING (5) #define XPRT_CLOSING (6) +#define XPRT_CONNECTION_CLOSE (8) static inline void xprt_set_connected(struct rpc_xprt *xprt) { --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -645,10 +645,8 @@ xprt_init_autodisconnect(unsigned long d if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) goto out_abort; spin_unlock(&xprt->transport_lock); - if (xprt_connecting(xprt)) - xprt_release_write(xprt, NULL); - else - queue_work(rpciod_workqueue, &xprt->task_cleanup); + set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); + queue_work(rpciod_workqueue, &xprt->task_cleanup); return; out_abort: spin_unlock(&xprt->transport_lock); --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -748,6 +748,9 @@ out_release: * * This is used when all requests are complete; ie, no DRC state remains * on the server we want to save. + * + * The caller _must_ be holding XPRT_LOCKED in order to avoid issues with + * xs_reset_transport() zeroing the socket from underneath a writer. */ static void xs_close(struct rpc_xprt *xprt) { @@ -781,6 +784,14 @@ clear_close_wait: xprt_disconnect_done(xprt); } +static void xs_tcp_close(struct rpc_xprt *xprt) +{ + if (test_and_clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state)) + xs_close(xprt); + else + xs_tcp_shutdown(xprt); +} + /** * xs_destroy - prepare to shutdown a transport * @xprt: doomed transport @@ -1676,11 +1687,21 @@ static void xs_tcp_connect_worker4(struc goto out_clear; case -ECONNREFUSED: case -ECONNRESET: + case -ENETUNREACH: /* retry with existing socket, after a delay */ - break; + goto out_clear; default: /* get rid of existing socket, and retry */ xs_tcp_shutdown(xprt); + printk("%s: connect returned unhandled error %d\n", + __func__, status); + case -EADDRNOTAVAIL: + /* We're probably in TIME_WAIT. Get rid of existing socket, + * and retry + */ + set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); + xprt_force_disconnect(xprt); + status = -EAGAIN; } } out: @@ -1735,11 +1756,21 @@ static void xs_tcp_connect_worker6(struc goto out_clear; case -ECONNREFUSED: case -ECONNRESET: + case -ENETUNREACH: /* retry with existing socket, after a delay */ - break; + goto out_clear; default: /* get rid of existing socket, and retry */ xs_tcp_shutdown(xprt); + printk("%s: connect returned unhandled error %d\n", + __func__, status); + case -EADDRNOTAVAIL: + /* We're probably in TIME_WAIT. Get rid of existing socket, + * and retry + */ + set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); + xprt_force_disconnect(xprt); + status = -EAGAIN; } } out: @@ -1871,7 +1902,7 @@ static struct rpc_xprt_ops xs_tcp_ops = .buf_free = rpc_free, .send_request = xs_tcp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, - .close = xs_tcp_shutdown, + .close = xs_tcp_close, .destroy = xs_destroy, .print_stats = xs_tcp_print_stats, }; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/