[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <664366b3-6a09-7306-c227-3dd071a726ee@cumulusnetworks.com>
Date: Thu, 7 Feb 2019 08:32:48 +0200
From: Nikolay Aleksandrov <nikolay@...ulusnetworks.com>
To: Callum Sinclair <callum.sinclair@...iedtelesis.co.nz>,
davem@...emloft.net, kuznet@....inr.ac.ru, yoshfuji@...ux-ipv6.org,
netdev@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH] ipmr: ip6mr: Create new sockopt to clear mfc cache only
On 07/02/2019 04:08, Callum Sinclair wrote:
> Currently the only way to clear the mfc cache was to delete the entries
> one by one using the MRT_DEL_MFC socket option or to destroy and
> recreate the socket.
>
> Create a new socket option which will clear the multicast forwarding
> cache on the socket without destroying the socket.
>
> Signed-off-by: Callum Sinclair <callum.sinclair@...iedtelesis.co.nz>
> ---
> include/uapi/linux/mroute.h | 7 +++-
> include/uapi/linux/mroute6.h | 7 +++-
> net/ipv4/ipmr.c | 69 +++++++++++++++++++++-------------
> net/ipv6/ip6mr.c | 73 ++++++++++++++++++++++--------------
> 4 files changed, 99 insertions(+), 57 deletions(-)
>
Hi,
Thanks for working on this. I think you missed one comment, this still seems
to clean all tables even though the socket has a table assigned. Could it
act only on that table ? All of the MRT calls besides the init act only on
the initialized table.
Also you're not checking if optlen is proper size, and I wonder which kernel is this
based on ? Because in net-next ip_mroute_setsockopt() takes rtnl in the beginning
and releases it in the end with the exception of MRT_DONE which needs to release it
earlier, the code below would cause a deadlock trying to get rtnl again in MRT_FLUSH.
This patch should be targeted at net-next, please indicate that also in your subject:
e.g. [PATCH net-next].
Thanks,
Nik
> diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h
> index 5d37a9ccce63..2d475edc3ec3 100644
> --- a/include/uapi/linux/mroute.h
> +++ b/include/uapi/linux/mroute.h
> @@ -28,12 +28,17 @@
> #define MRT_TABLE (MRT_BASE+9) /* Specify mroute table ID */
> #define MRT_ADD_MFC_PROXY (MRT_BASE+10) /* Add a (*,*|G) mfc entry */
> #define MRT_DEL_MFC_PROXY (MRT_BASE+11) /* Del a (*,*|G) mfc entry */
> -#define MRT_MAX (MRT_BASE+11)
> +#define MRT_FLUSH (MRT_BASE+12) /* Flush all multicast entries and vifs */
> +#define MRT_MAX (MRT_BASE+12)
>
> #define SIOCGETVIFCNT SIOCPROTOPRIVATE /* IP protocol privates */
> #define SIOCGETSGCNT (SIOCPROTOPRIVATE+1)
> #define SIOCGETRPF (SIOCPROTOPRIVATE+2)
>
> +/* Flags used for MRT_FLUSH */
> +#define MRT_FLUSH_ENTRIES 1 /* For flushing all multicast entries */
> +#define MRT_FLUSH_VIFS 2 /* For flushing all multicast vifs */
> +
> #define MAXVIFS 32
> typedef unsigned long vifbitmap_t; /* User mode code depends on this lot */
> typedef unsigned short vifi_t;
> diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h
> index 9999cc006390..b04094d997c8 100644
> --- a/include/uapi/linux/mroute6.h
> +++ b/include/uapi/linux/mroute6.h
> @@ -31,12 +31,17 @@
> #define MRT6_TABLE (MRT6_BASE+9) /* Specify mroute table ID */
> #define MRT6_ADD_MFC_PROXY (MRT6_BASE+10) /* Add a (*,*|G) mfc entry */
> #define MRT6_DEL_MFC_PROXY (MRT6_BASE+11) /* Del a (*,*|G) mfc entry */
> -#define MRT6_MAX (MRT6_BASE+11)
> +#define MRT6_FLUSH (MRT6_BASE+12) /* Flush all multicast entries and vifs */
> +#define MRT6_MAX (MRT6_BASE+12)
>
> #define SIOCGETMIFCNT_IN6 SIOCPROTOPRIVATE /* IP protocol privates */
> #define SIOCGETSGCNT_IN6 (SIOCPROTOPRIVATE+1)
> #define SIOCGETRPF (SIOCPROTOPRIVATE+2)
>
> +/* Flags used for MRT6_FLUSH*/
> +#define MRT6_FLUSH_ENTRIES 1 /* For flushing all multicast entries */
> +#define MRT6_FLUSH_VIFS 2 /* For flushing all multicast vifs */
> +
> #define MAXMIFS 32
> typedef unsigned long mifbitmap_t; /* User mode code depends on this lot */
> typedef unsigned short mifi_t;
> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
> index ddbf8c9a1abb..2eb569138569 100644
> --- a/net/ipv4/ipmr.c
> +++ b/net/ipv4/ipmr.c
> @@ -416,7 +416,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
> static void ipmr_free_table(struct mr_table *mrt)
> {
> del_timer_sync(&mrt->ipmr_expire_timer);
> - mroute_clean_tables(mrt, true);
> + mroute_clean_tables(mrt, true, MRT_FLUSH_VIFS | MRT_FLUSH_ENTRIES);
> rhltable_destroy(&mrt->mfc_hash);
> kfree(mrt);
> }
> @@ -1299,44 +1299,48 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
> }
>
> /* Close the multicast socket, and clear the vif tables etc */
> -static void mroute_clean_tables(struct mr_table *mrt, bool all)
> +static void mroute_clean_tables(struct mr_table *mrt, bool all, int flags)
> {
> struct net *net = read_pnet(&mrt->net);
> - struct mr_mfc *c, *tmp;
> struct mfc_cache *cache;
> + struct mr_mfc *c, *tmp;
> LIST_HEAD(list);
> int i;
>
> /* Shut down all active vif entries */
> - for (i = 0; i < mrt->maxvif; i++) {
> - if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
> - continue;
> - vif_delete(mrt, i, 0, &list);
> + if (flags & MRT_FLUSH_VIFS) {
> + for (i = 0; i < mrt->maxvif; i++) {
> + if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
> + continue;
> + vif_delete(mrt, i, 0, &list);
> + }
> + unregister_netdevice_many(&list);
> }
> - unregister_netdevice_many(&list);
>
> /* Wipe the cache */
> - list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
> - if (!all && (c->mfc_flags & MFC_STATIC))
> - continue;
> - rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
> - list_del_rcu(&c->list);
> - cache = (struct mfc_cache *)c;
> - call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache,
> - mrt->id);
> - mroute_netlink_event(mrt, cache, RTM_DELROUTE);
> - mr_cache_put(c);
> - }
> -
> - if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
> - spin_lock_bh(&mfc_unres_lock);
> - list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
> - list_del(&c->list);
> + if (flags & MRT_FLUSH_ENTRIES) {
> + list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
> + if (!all && (c->mfc_flags & MFC_STATIC))
> + continue;
> + rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
> + list_del_rcu(&c->list);
> cache = (struct mfc_cache *)c;
> + call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache,
> + mrt->id);
> mroute_netlink_event(mrt, cache, RTM_DELROUTE);
> - ipmr_destroy_unres(mrt, cache);
> + mr_cache_put(c);
> + }
> +
> + if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
> + spin_lock_bh(&mfc_unres_lock);
> + list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
> + list_del(&c->list);
> + cache = (struct mfc_cache *)c;
> + mroute_netlink_event(mrt, cache, RTM_DELROUTE);
> + ipmr_destroy_unres(mrt, cache);
> + }
> + spin_unlock_bh(&mfc_unres_lock);
> }
> - spin_unlock_bh(&mfc_unres_lock);
> }
> }
>
> @@ -1357,7 +1361,7 @@ static void mrtsock_destruct(struct sock *sk)
> NETCONFA_IFINDEX_ALL,
> net->ipv4.devconf_all);
> RCU_INIT_POINTER(mrt->mroute_sk, NULL);
> - mroute_clean_tables(mrt, false);
> + mroute_clean_tables(mrt, false, MRT_FLUSH_VIFS | MRT_FLUSH_ENTRIES);
> }
> }
> rtnl_unlock();
> @@ -1482,6 +1486,17 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
> sk == rtnl_dereference(mrt->mroute_sk),
> parent);
> break;
> + case MRT_FLUSH:
> + if (get_user(val, (int __user *)optval)) {
> + ret = -EFAULT;
> + break;
> + }
> + rtnl_lock();
> + ipmr_for_each_table(mrt, net) {
> + mroute_clean_tables(mrt, true, val);
> + }
> + rtnl_unlock();
> + break;
> /* Control PIM assert. */
> case MRT_ASSERT:
> if (optlen != sizeof(val)) {
> diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
> index 30337b38274b..473c83d197fe 100644
> --- a/net/ipv6/ip6mr.c
> +++ b/net/ipv6/ip6mr.c
> @@ -393,7 +393,7 @@ static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
> static void ip6mr_free_table(struct mr_table *mrt)
> {
> del_timer_sync(&mrt->ipmr_expire_timer);
> - mroute_clean_tables(mrt, true);
> + mroute_clean_tables(mrt, true, MRT6_FLUSH_VIFS | MRT6_FLUSH_ENTRIES);
> rhltable_destroy(&mrt->mfc_hash);
> kfree(mrt);
> }
> @@ -1496,43 +1496,47 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
> * Close the multicast socket, and clear the vif tables etc
> */
>
> -static void mroute_clean_tables(struct mr_table *mrt, bool all)
> +static void mroute_clean_tables(struct mr_table *mrt, bool all, int flags)
> {
> struct mr_mfc *c, *tmp;
> LIST_HEAD(list);
> int i;
>
> /* Shut down all active vif entries */
> - for (i = 0; i < mrt->maxvif; i++) {
> - if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
> - continue;
> - mif6_delete(mrt, i, 0, &list);
> + if (flags & MRT6_FLUSH_VIFS) {
> + for (i = 0; i < mrt->maxvif; i++) {
> + if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
> + continue;
> + mif6_delete(mrt, i, 0, &list);
> + }
> + unregister_netdevice_many(&list);
> }
> - unregister_netdevice_many(&list);
>
> /* Wipe the cache */
> - list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
> - if (!all && (c->mfc_flags & MFC_STATIC))
> - continue;
> - rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
> - list_del_rcu(&c->list);
> - mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
> - mr_cache_put(c);
> - }
> + if (flags & MRT6_FLUSH_ENTRIES) {
> + list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
> + if (!all && (c->mfc_flags & MFC_STATIC))
> + continue;
> + rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
> + list_del_rcu(&c->list);
> + mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
> + mr_cache_put(c);
> + }
>
> - if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
> - spin_lock_bh(&mfc_unres_lock);
> - list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
> - list_del(&c->list);
> - call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
> - FIB_EVENT_ENTRY_DEL,
> - (struct mfc6_cache *)c,
> - mrt->id);
> - mr6_netlink_event(mrt, (struct mfc6_cache *)c,
> - RTM_DELROUTE);
> - ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
> + if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
> + spin_lock_bh(&mfc_unres_lock);
> + list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
> + list_del(&c->list);
> + call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
> + FIB_EVENT_ENTRY_DEL,
> + (struct mfc6_cache *)c,
> + mrt->id);
> + mr6_netlink_event(mrt, (struct mfc6_cache *)c,
> + RTM_DELROUTE);
> + ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
> + }
> + spin_unlock_bh(&mfc_unres_lock);
> }
> - spin_unlock_bh(&mfc_unres_lock);
> }
> }
>
> @@ -1588,7 +1592,7 @@ int ip6mr_sk_done(struct sock *sk)
> NETCONFA_IFINDEX_ALL,
> net->ipv6.devconf_all);
>
> - mroute_clean_tables(mrt, false);
> + mroute_clean_tables(mrt, false, MRT6_FLUSH_VIFS | MRT6_FLUSH_ENTRIES);
> err = 0;
> break;
> }
> @@ -1703,6 +1707,19 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
> parent);
> rtnl_unlock();
> return ret;
> + case MRT6_DEL_MFC_ALL:
> + {
> + int flags;
> +
> + if (get_user(flags, (int __user *)optval))
> + return -EFAULT;
> + rtnl_lock();
> + ip6mr_for_each_table(mrt, net) {
> + mroute_clean_tables(mrt, true, flags);
> + }
> + rtnl_unlock();
> + return 0;
> + }
>
> /*
> * Control PIM assert (to activate pim will activate assert)
>
Powered by blists - more mailing lists