[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20100427.145904.67907652.davem@davemloft.net>
Date: Tue, 27 Apr 2010 14:59:04 -0700 (PDT)
From: David Miller <davem@...emloft.net>
To: eric.dumazet@...il.com
Cc: therbert@...gle.com, netdev@...r.kernel.org
Subject: Re: [PATCH v5] rfs: Receive Flow Steering
From: Eric Dumazet <eric.dumazet@...il.com>
Date: Mon, 26 Apr 2010 10:41:11 +0200
> Le lundi 19 avril 2010 à 14:19 -0700, David Miller a écrit :
>
>>
>> I was thinking also about how we could compute rxhash in the
>> loopback driver :-)
>
> This would be easy if rxhash was not a "struct inet_sock" field but a
> "struct sock" one
>
> sock_alloc_send_pskb() (or skb_set_owner_w())
>
> skb->rxhash = sk->rxhash;
Agreed. I'll commit the following to net-next-2.6 after some build
testing.
net: Make RFS socket operations not be inet specific.
Idea from Eric Dumazet.
As for placement inside of struct sock, I tried to choose a place
that otherwise has a 32-bit hole on 64-bit systems.
Signed-off-by: David S. Miller <davem@...emloft.net>
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index c1d4295..1653de5 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -102,7 +102,6 @@ struct rtable;
* @uc_ttl - Unicast TTL
* @inet_sport - Source port
* @inet_id - ID counter for DF pkts
- * @rxhash - flow hash received from netif layer
* @tos - TOS
* @mc_ttl - Multicasting TTL
* @is_icsk - is this an inet_connection_sock?
@@ -126,9 +125,6 @@ struct inet_sock {
__u16 cmsg_flags;
__be16 inet_sport;
__u16 inet_id;
-#ifdef CONFIG_RPS
- __u32 rxhash;
-#endif
struct ip_options *opt;
__u8 tos;
@@ -224,37 +220,4 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
return inet_sk(sk)->transparent ? FLOWI_FLAG_ANYSRC : 0;
}
-static inline void inet_rps_record_flow(const struct sock *sk)
-{
-#ifdef CONFIG_RPS
- struct rps_sock_flow_table *sock_flow_table;
-
- rcu_read_lock();
- sock_flow_table = rcu_dereference(rps_sock_flow_table);
- rps_record_sock_flow(sock_flow_table, inet_sk(sk)->rxhash);
- rcu_read_unlock();
-#endif
-}
-
-static inline void inet_rps_reset_flow(const struct sock *sk)
-{
-#ifdef CONFIG_RPS
- struct rps_sock_flow_table *sock_flow_table;
-
- rcu_read_lock();
- sock_flow_table = rcu_dereference(rps_sock_flow_table);
- rps_reset_sock_flow(sock_flow_table, inet_sk(sk)->rxhash);
- rcu_read_unlock();
-#endif
-}
-
-static inline void inet_rps_save_rxhash(struct sock *sk, u32 rxhash)
-{
-#ifdef CONFIG_RPS
- if (unlikely(inet_sk(sk)->rxhash != rxhash)) {
- inet_rps_reset_flow(sk);
- inet_sk(sk)->rxhash = rxhash;
- }
-#endif
-}
#endif /* _INET_SOCK_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index ef2f875..cf12b1e 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -198,6 +198,7 @@ struct sock_common {
* @sk_rcvlowat: %SO_RCVLOWAT setting
* @sk_rcvtimeo: %SO_RCVTIMEO setting
* @sk_sndtimeo: %SO_SNDTIMEO setting
+ * @sk_rxhash: flow hash received from netif layer
* @sk_filter: socket filtering instructions
* @sk_protinfo: private area, net family specific, when not using slab
* @sk_timer: sock cleanup timer
@@ -278,6 +279,9 @@ struct sock {
int sk_gso_type;
unsigned int sk_gso_max_size;
int sk_rcvlowat;
+#ifdef CONFIG_RPS
+ __u32 sk_rxhash;
+#endif
unsigned long sk_flags;
unsigned long sk_lingertime;
struct sk_buff_head sk_error_queue;
@@ -629,6 +633,40 @@ static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
return sk->sk_backlog_rcv(sk, skb);
}
+static inline void sock_rps_record_flow(const struct sock *sk)
+{
+#ifdef CONFIG_RPS
+ struct rps_sock_flow_table *sock_flow_table;
+
+ rcu_read_lock();
+ sock_flow_table = rcu_dereference(rps_sock_flow_table);
+ rps_record_sock_flow(sock_flow_table, sk->sk_rxhash);
+ rcu_read_unlock();
+#endif
+}
+
+static inline void sock_rps_reset_flow(const struct sock *sk)
+{
+#ifdef CONFIG_RPS
+ struct rps_sock_flow_table *sock_flow_table;
+
+ rcu_read_lock();
+ sock_flow_table = rcu_dereference(rps_sock_flow_table);
+ rps_reset_sock_flow(sock_flow_table, sk->sk_rxhash);
+ rcu_read_unlock();
+#endif
+}
+
+static inline void sock_rps_save_rxhash(struct sock *sk, u32 rxhash)
+{
+#ifdef CONFIG_RPS
+ if (unlikely(sk->sk_rxhash != rxhash)) {
+ sock_rps_reset_flow(sk);
+ sk->sk_rxhash = rxhash;
+ }
+#endif
+}
+
#define sk_wait_event(__sk, __timeo, __condition) \
({ int __rc; \
release_sock(__sk); \
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 9f52880..c6c43bc 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -419,7 +419,7 @@ int inet_release(struct socket *sock)
if (sk) {
long timeout;
- inet_rps_reset_flow(sk);
+ sock_rps_reset_flow(sk);
/* Applications forget to leave groups before exiting */
ip_mc_drop_socket(sk);
@@ -722,7 +722,7 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
{
struct sock *sk = sock->sk;
- inet_rps_record_flow(sk);
+ sock_rps_record_flow(sk);
/* We may need to bind the socket. */
if (!inet_sk(sk)->inet_num && inet_autobind(sk))
@@ -737,7 +737,7 @@ static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
{
struct sock *sk = sock->sk;
- inet_rps_record_flow(sk);
+ sock_rps_record_flow(sk);
/* We may need to bind the socket. */
if (!inet_sk(sk)->inet_num && inet_autobind(sk))
@@ -755,7 +755,7 @@ int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
int addr_len = 0;
int err;
- inet_rps_record_flow(sk);
+ sock_rps_record_flow(sk);
err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
flags & ~MSG_DONTWAIT, &addr_len);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4d6717d..771f814 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1672,7 +1672,7 @@ process:
skb->dev = NULL;
- inet_rps_save_rxhash(sk, skb->rxhash);
+ sock_rps_save_rxhash(sk, skb->rxhash);
bh_lock_sock_nested(sk);
ret = 0;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 776c844..63eb56b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1217,7 +1217,7 @@ int udp_disconnect(struct sock *sk, int flags)
sk->sk_state = TCP_CLOSE;
inet->inet_daddr = 0;
inet->inet_dport = 0;
- inet_rps_save_rxhash(sk, 0);
+ sock_rps_save_rxhash(sk, 0);
sk->sk_bound_dev_if = 0;
if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
inet_reset_saddr(sk);
@@ -1262,7 +1262,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
int rc;
if (inet_sk(sk)->inet_daddr)
- inet_rps_save_rxhash(sk, skb->rxhash);
+ sock_rps_save_rxhash(sk, skb->rxhash);
rc = sock_queue_rcv_skb(sk, skb);
if (rc < 0) {
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists