lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 15 Mar 2016 10:47:12 -0700
From:	Tom Herbert <tom@...bertland.com>
To:	Sowmini Varadhan <sowmini.varadhan@...cle.com>
Cc:	Linux Kernel Network Developers <netdev@...r.kernel.org>,
	santosh.shilimkar@...cle.com,
	"David S. Miller" <davem@...emloft.net>
Subject: Re: [PATCH v2 net-next] rds-tcp: Add sysctl tunables for
 sndbuf/rcvbuf on rds-tcp socket

On Tue, Mar 15, 2016 at 10:34 AM, Sowmini Varadhan
<sowmini.varadhan@...cle.com> wrote:
> On (03/15/16 10:30), Tom Herbert wrote:
>> I still don't understand why you won't consider using netlink. sndbuf
>> and rcvbuf seem like just two of many arbitrary parameters that a user
>> might want to configure.
>
>> I would think that being able to configure
>> the listener port (currently a fixed value), an address to bind to
>
> the listener port is properly registered with IANA. I dont think
> we will need to change that, any more than ssh/nfs ports will need
> to change.
>
Both sshd and nfsd and allow configurable listener port numbers. Any
listener service will allow a configurable port number. An IANA port
number is good as a default, but there are many reasons why people
want or need to use a different port number. I don't see what makes
RDS special in this regard.

>> (instead of always inaddr-any), or not automatically creating an RDS
>> listener in every namespace are at least as important configuration
>> parameters as these two.
>
> Those parameters can be added via sysctl if needed- we have not
> seen the need for that yet.
>
> And if something comes up that needs the netlink etc. we can
> add it down the road. It's just not needed now.
>
> Thanks
> --Sowmini
>>
>> Tom
>>
>> >  net/rds/tcp.c |  139 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
>> >  1 files changed, 129 insertions(+), 10 deletions(-)
>> >
>> > diff --git a/net/rds/tcp.c b/net/rds/tcp.c
>> > index ad60299..5d62fd2 100644
>> > --- a/net/rds/tcp.c
>> > +++ b/net/rds/tcp.c
>> > @@ -52,7 +52,27 @@ static LIST_HEAD(rds_tcp_conn_list);
>> >
>> >  static struct kmem_cache *rds_tcp_conn_slab;
>> >
>> > -#define RDS_TCP_DEFAULT_BUFSIZE (128 * 1024)
>> > +static int sndbuf_handler(struct ctl_table *ctl, int write,
>> > +                         void __user *buffer, size_t *lenp, loff_t *fpos);
>> > +static int rcvbuf_handler(struct ctl_table *ctl, int write,
>> > +                         void __user *buffer, size_t *lenp, loff_t *fpos);
>> > +static struct ctl_table rds_tcp_sysctl_table[] = {
>> > +       {
>> > +               .procname       = "rds_tcp_sndbuf",
>> > +               /* data is per-net pointer */
>> > +               .maxlen         = sizeof(int),
>> > +               .mode           = 0644,
>> > +               .proc_handler   = sndbuf_handler,
>> > +       },
>> > +       {
>> > +               .procname       = "rds_tcp_rcvbuf",
>> > +               /* data is per-net pointer */
>> > +               .maxlen         = sizeof(int),
>> > +               .mode           = 0644,
>> > +               .proc_handler   = rcvbuf_handler,
>> > +       },
>> > +       { }
>> > +};
>> >
>> >  /* doing it this way avoids calling tcp_sk() */
>> >  void rds_tcp_nonagle(struct socket *sock)
>> > @@ -66,15 +86,6 @@ void rds_tcp_nonagle(struct socket *sock)
>> >         set_fs(oldfs);
>> >  }
>> >
>> > -/* All module specific customizations to the RDS-TCP socket should be done in
>> > - * rds_tcp_tune() and applied after socket creation. In general these
>> > - * customizations should be tunable via module_param()
>> > - */
>> > -void rds_tcp_tune(struct socket *sock)
>> > -{
>> > -       rds_tcp_nonagle(sock);
>> > -}
>> > -
>> >  u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc)
>> >  {
>> >         return tcp_sk(tc->t_sock->sk)->snd_nxt;
>> > @@ -272,8 +283,33 @@ static int rds_tcp_netid;
>> >  struct rds_tcp_net {
>> >         struct socket *rds_tcp_listen_sock;
>> >         struct work_struct rds_tcp_accept_w;
>> > +       struct ctl_table_header *rds_tcp_sysctl;
>> > +       int sndbuf_size;
>> > +       int rcvbuf_size;
>> >  };
>> >
>> > +/* All module specific customizations to the RDS-TCP socket should be done in
>> > + * rds_tcp_tune() and applied after socket creation.
>> > + */
>> > +void rds_tcp_tune(struct socket *sock)
>> > +{
>> > +       struct sock *sk = sock->sk;
>> > +       struct net *net = sock_net(sk);
>> > +       struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
>> > +
>> > +       rds_tcp_nonagle(sock);
>> > +       lock_sock(sk);
>> > +       if (rtn->sndbuf_size > 0) {
>> > +               sk->sk_sndbuf = rtn->sndbuf_size;
>> > +               sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
>> > +       }
>> > +       if (rtn->rcvbuf_size > 0) {
>> > +               sk->sk_rcvbuf = rtn->rcvbuf_size;
>> > +               sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
>> > +       }
>> > +       release_sock(sk);
>> > +}
>> > +
>> >  static void rds_tcp_accept_worker(struct work_struct *work)
>> >  {
>> >         struct rds_tcp_net *rtn = container_of(work,
>> > @@ -296,6 +332,13 @@ static __net_init int rds_tcp_init_net(struct net *net)
>> >  {
>> >         struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
>> >
>> > +       /* 0 value for {snd, rcv}buf_size implies we let the stack
>> > +        * pick the default, and permit auto-tuning of buffer size.
>> > +        */
>> > +       rtn->sndbuf_size = 0;
>> > +       rtn->rcvbuf_size = 0;
>> > +       rtn->rds_tcp_sysctl = register_net_sysctl(net, "net/rds/tcp",
>> > +                                                 rds_tcp_sysctl_table);
>> >         rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net);
>> >         if (!rtn->rds_tcp_listen_sock) {
>> >                 pr_warn("could not set up listen sock\n");
>> > @@ -309,6 +352,7 @@ static void __net_exit rds_tcp_exit_net(struct net *net)
>> >  {
>> >         struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
>> >
>> > +       unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
>> >         /* If rds_tcp_exit_net() is called as a result of netns deletion,
>> >          * the rds_tcp_kill_sock() device notifier would already have cleaned
>> >          * up the listen socket, thus there is no work to do in this function.
>> > @@ -383,6 +427,81 @@ static struct notifier_block rds_tcp_dev_notifier = {
>> >         .priority = -10, /* must be called after other network notifiers */
>> >  };
>> >
>> > +static int user_atoi(char __user *ubuf, size_t len)
>> > +{
>> > +       char buf[16];
>> > +       unsigned long ret;
>> > +       int err;
>> > +
>> > +       if (len > 15)
>> > +               return -EINVAL;
>> > +
>> > +       if (copy_from_user(buf, ubuf, len))
>> > +               return -EFAULT;
>> > +
>> > +       buf[len] = 0;
>> > +       err =  kstrtoul(buf, 0, &ret);
>> > +       if (err != 0)
>> > +               return -ERANGE;
>> > +       return  ret;
>> > +}
>> > +
>> > +/* when sysctl is used to modify some kernel socket parameters,this
>> > + * function  resets the RDS connections in that netns  so that we can
>> > + * restart with new parameters.  The assumption is that such reset
>> > + * events are few and far-between.
>> > + */
>> > +static void rds_tcp_sysctl_reset(struct net *net)
>> > +{
>> > +       struct rds_tcp_connection *tc, *_tc;
>> > +
>> > +       spin_lock_irq(&rds_tcp_conn_lock);
>> > +       list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
>> > +               struct net *c_net = read_pnet(&tc->conn->c_net);
>> > +
>> > +               if (net != c_net || !tc->t_sock)
>> > +                       continue;
>> > +               rds_conn_drop(tc->conn); /* reconnect with new parameters */
>> > +       }
>> > +       spin_unlock_irq(&rds_tcp_conn_lock);
>> > +}
>> > +
>> > +static int sndbuf_handler(struct ctl_table *ctl, int write,
>> > +                         void __user *buffer, size_t *lenp, loff_t *fpos)
>> > +{
>> > +       struct net *net = current->nsproxy->net_ns;
>> > +       struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
>> > +       struct ctl_table tmp;
>> > +
>> > +       tmp = *ctl;
>> > +       tmp.data = &rtn->sndbuf_size;
>> > +       if (!write)
>> > +               return proc_dointvec(&tmp, write, buffer, lenp, fpos);
>> > +
>> > +       rtn->sndbuf_size = user_atoi(buffer, *lenp);
>> > +       rds_tcp_sysctl_reset(net);
>> > +
>> > +       return 0;
>> > +}
>> > +
>> > +static int rcvbuf_handler(struct ctl_table *ctl, int write,
>> > +                         void __user *buffer, size_t *lenp, loff_t *fpos)
>> > +{
>> > +       struct net *net = current->nsproxy->net_ns;
>> > +       struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
>> > +       struct ctl_table tmp;
>> > +
>> > +       tmp = *ctl;
>> > +       tmp.data = &rtn->rcvbuf_size;
>> > +       if (!write)
>> > +               return proc_dointvec(&tmp, write, buffer, lenp, fpos);
>> > +
>> > +       rtn->rcvbuf_size = user_atoi(buffer, *lenp);
>> > +       rds_tcp_sysctl_reset(net);
>> > +
>> > +       return 0;
>> > +}
>> > +
>> >  static void rds_tcp_exit(void)
>> >  {
>> >         rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
>> > --
>> > 1.7.1
>> >

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ