[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1529431217-5264-5-git-send-email-tushar.n.dave@oracle.com>
Date: Tue, 19 Jun 2018 20:00:17 +0200
From: Tushar Dave <tushar.n.dave@...cle.com>
To: ast@...nel.org, daniel@...earbox.net, davem@...emloft.net,
jakub.kicinski@...ronome.com, quentin.monnet@...ronome.com,
jiong.wang@...ronome.com, guro@...com, sandipan@...ux.vnet.ibm.com,
john.fastabend@...il.com, kafai@...com, rdna@...com, brakmo@...com,
netdev@...r.kernel.org, acme@...hat.com,
sowmini.varadhan@...cle.com
Subject: [RFC v2 PATCH 4/4] rds: invoke socket sg filter attached to rds socket
RDS module sits on top of TCP (rds_tcp) and IB (rds_rdma), so messages
arrive in form of skb (over TCP) and scatterlist (over IB/RDMA).
However, because socket filter only deal with skb (e.g. struct skb as
bpf context) we can only use socket filter for rds_tcp and not for
rds_rdma.
Considering one filtering solution for RDS, it seems that the common
denominator between sk_buff and scatterlist is scatterlist. Therefore,
this patch converts skb to sgvec and invoke sg_filter_run for
rds_tcp and simply invoke sg_filter_run for IB/rds_rdma.
Signed-off-by: Tushar Dave <tushar.n.dave@...cle.com>
Reviewed-by: Sowmini Varadhan <sowmini.varadhan@...cle.com>
---
net/rds/ib.c | 1 +
net/rds/ib.h | 1 +
net/rds/ib_recv.c | 12 ++++++++++++
net/rds/rds.h | 2 ++
net/rds/recv.c | 16 ++++++++++++++++
net/rds/tcp.c | 2 ++
net/rds/tcp.h | 2 ++
net/rds/tcp_recv.c | 38 ++++++++++++++++++++++++++++++++++++++
8 files changed, 74 insertions(+)
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 02deee2..3027832 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -421,6 +421,7 @@ struct rds_transport rds_ib_transport = {
.conn_path_shutdown = rds_ib_conn_path_shutdown,
.inc_copy_to_user = rds_ib_inc_copy_to_user,
.inc_free = rds_ib_inc_free,
+ .inc_to_sg_get = rds_ib_inc_to_sg_get,
.cm_initiate_connect = rds_ib_cm_initiate_connect,
.cm_handle_connect = rds_ib_cm_handle_connect,
.cm_connect_complete = rds_ib_cm_connect_complete,
diff --git a/net/rds/ib.h b/net/rds/ib.h
index a6f4d7d..699b5b9b 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -375,6 +375,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn,
void rds_ib_recv_free_caches(struct rds_ib_connection *ic);
void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp);
void rds_ib_inc_free(struct rds_incoming *inc);
+int rds_ib_inc_to_sg_get(struct rds_incoming *inc, struct scatterlist **sg);
int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to);
void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc,
struct rds_ib_ack_state *state);
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index b4e421a..62be497 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -219,6 +219,18 @@ void rds_ib_inc_free(struct rds_incoming *inc)
rds_ib_recv_cache_put(&ibinc->ii_cache_entry, &ic->i_cache_incs);
}
+int rds_ib_inc_to_sg_get(struct rds_incoming *inc, struct scatterlist **sg)
+{
+ struct rds_ib_incoming *ibinc;
+ struct rds_page_frag *frag;
+
+ ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
+ frag = list_entry(ibinc->ii_frags.next, struct rds_page_frag, f_item);
+ *sg = &frag->f_sg;
+
+ return 0;
+}
+
static void rds_ib_recv_clear_one(struct rds_ib_connection *ic,
struct rds_ib_recv_work *recv)
{
diff --git a/net/rds/rds.h b/net/rds/rds.h
index b04c333..f5ea833 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -528,6 +528,8 @@ struct rds_transport {
int (*recv_path)(struct rds_conn_path *cp);
int (*inc_copy_to_user)(struct rds_incoming *inc, struct iov_iter *to);
void (*inc_free)(struct rds_incoming *inc);
+ int (*inc_to_sg_get)(struct rds_incoming *inc, struct scatterlist **sg);
+ void (*inc_to_sg_put)(struct scatterlist **sg);
int (*cm_handle_connect)(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event);
diff --git a/net/rds/recv.c b/net/rds/recv.c
index dc67458..e0c5b4c 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -286,6 +286,7 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
struct sock *sk;
unsigned long flags;
struct rds_conn_path *cp;
+ struct sk_filter *filter;
inc->i_conn = conn;
inc->i_rx_jiffies = jiffies;
@@ -369,6 +370,21 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
/* We can be racing with rds_release() which marks the socket dead. */
sk = rds_rs_to_sk(rs);
+ rcu_read_lock();
+ filter = rcu_dereference(sk->sk_filter);
+ if (filter) {
+ if (conn->c_trans->inc_to_sg_get) {
+ struct scatterlist *sg;
+
+ if (conn->c_trans->inc_to_sg_get(inc, &sg) == 0) {
+ sg_filter_run(sk, sg);
+ if (conn->c_trans->inc_to_sg_put)
+ conn->c_trans->inc_to_sg_put(&sg);
+ }
+ }
+ }
+ rcu_read_unlock();
+
/* serialize with rds_release -> sock_orphan */
write_lock_irqsave(&rs->rs_recv_lock, flags);
if (!sock_flag(sk, SOCK_DEAD)) {
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 351a284..b431854 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -376,6 +376,8 @@ struct rds_transport rds_tcp_transport = {
.conn_path_shutdown = rds_tcp_conn_path_shutdown,
.inc_copy_to_user = rds_tcp_inc_copy_to_user,
.inc_free = rds_tcp_inc_free,
+ .inc_to_sg_get = rds_tcp_inc_to_sg_get,
+ .inc_to_sg_put = rds_tcp_inc_to_sg_put,
.stats_info_copy = rds_tcp_stats_info_copy,
.exit = rds_tcp_exit,
.t_owner = THIS_MODULE,
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index c6fa080..466bdb9 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -82,6 +82,8 @@ void rds_tcp_restore_callbacks(struct socket *sock,
int rds_tcp_recv_path(struct rds_conn_path *cp);
void rds_tcp_inc_free(struct rds_incoming *inc);
int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to);
+int rds_tcp_inc_to_sg_get(struct rds_incoming *inc, struct scatterlist **sg);
+void rds_tcp_inc_to_sg_put(struct scatterlist **sg);
/* tcp_send.c */
void rds_tcp_xmit_path_prepare(struct rds_conn_path *cp);
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index b9fbd2e..ce62712 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -56,6 +56,44 @@ void rds_tcp_inc_free(struct rds_incoming *inc)
kmem_cache_free(rds_tcp_incoming_slab, tinc);
}
+#define MAX_SG 17
+int rds_tcp_inc_to_sg_get(struct rds_incoming *inc, struct scatterlist **sg)
+{
+ struct scatterlist *sg_list;
+ struct rds_tcp_incoming *tinc;
+ struct sk_buff *skb;
+ int num_sg = 0;
+
+ tinc = container_of(inc, struct rds_tcp_incoming, ti_inc);
+
+ /* For now we are assuming that the max sg elements we need is MAX_SG.
+ * To determine actual number of sg elements we need to traverse the
+ * skb queue e.g.
+ *
+ * skb_queue_walk(&tinc->ti_skb_list, skb) {
+ * num_sg += skb_shinfo(skb)->nr_frags + 1;
+ * }
+ */
+ sg_list = kzalloc(sizeof(*sg_list) * MAX_SG, GFP_KERNEL);
+ if (!sg_list)
+ return -ENOMEM;
+
+ sg_init_table(sg_list, MAX_SG);
+ skb_queue_walk(&tinc->ti_skb_list, skb) {
+ num_sg += skb_to_sgvec_nomark(skb, &sg_list[num_sg], 0,
+ skb->len);
+ }
+ sg_mark_end(&sg_list[num_sg - 1]);
+ *sg = sg_list;
+
+ return 0;
+}
+
+void rds_tcp_inc_to_sg_put(struct scatterlist **sg)
+{
+ kfree(*sg);
+}
+
/*
* this is pretty lame, but, whatever.
*/
--
1.8.3.1
Powered by blists - more mailing lists