lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1319208237.32161.14.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>
Date:	Fri, 21 Oct 2011 16:43:57 +0200
From:	Eric Dumazet <eric.dumazet@...il.com>
To:	Jiri Pirko <jpirko@...hat.com>
Cc:	netdev@...r.kernel.org, davem@...emloft.net,
	bhutchings@...arflare.com, shemminger@...tta.com, fubar@...ibm.com,
	andy@...yhouse.net, tgraf@...radead.org, ebiederm@...ssion.com,
	mirqus@...il.com, kaber@...sh.net, greearb@...delatech.com,
	jesse@...ira.com, fbl@...hat.com, benjamin.poirier@...il.com,
	jzupka@...hat.com
Subject: Re: [patch net-next V2] net: introduce ethernet teaming device

Le vendredi 21 octobre 2011 à 14:39 +0200, Jiri Pirko a écrit :
> This patch introduces new network device called team. It supposes to be
> very fast, simple, userspace-driven alternative to existing bonding
> driver.
> 
> Userspace library called libteam with couple of demo apps is available
> here:
> https://github.com/jpirko/libteam
> Note it's still in its dipers atm.
> 
> team<->libteam use generic netlink for communication. That and rtnl
> suppose to be the only way to configure team device, no sysfs etc.
> 
> Python binding basis for libteam was recently introduced (some need
> still need to be done on it though). Daemon providing arpmon/miimon
> active-backup functionality will be introduced shortly.
> All what's necessary is already implemented in kernel team driver.
> 
> Signed-off-by: Jiri Pirko <jpirko@...hat.com>
> 
> v1->v2:
> 	- modes are made as modules. Makes team more modular and
> 	  extendable.
> 	- several commenters' nitpicks found on v1 were fixed
> 	- several other bugs were fixed.
> 	- note I ignored Eric's comment about roundrobin port selector
> 	  as Eric's way may be easily implemented as another mode (mode
> 	  "random") in future.
> ---

Very nice work !

> +
> +
> +/*
> + * We can benefit from the fact that it's ensured no port is present
> + * at the time of mode change.
> + */
> +static int __team_change_mode(struct team *team,
> +			      const struct team_mode *new_mode)
> +{
> +	/* Check if mode was previously set and do cleanup if so */
> +	if (team->mode_kind) {
> +		void (*exit_op)(struct team *team) = team->mode_ops.exit;
> +
> +		/* Clear ops area so no callback is called any longer */
> +		memset(&team->mode_ops, 0, sizeof(struct team_mode_ops));

	Hmm, memset() has no atomicity guarantee about 'longs' or 'pointers'.

	You must make sure mode_ops.receive (and other pointers) is set not
byte per byte, but in one go.

> +
> +		synchronize_rcu();
> +
> +		if (exit_op)
> +			exit_op(team);
> +		team_mode_put(team->mode_kind);
> +		team->mode_kind = NULL;
> +		/* zero private data area */
> +		memset(&team->mode_priv, 0,
> +		       sizeof(struct team) - offsetof(struct team, mode_priv));
> +	}
> +
> +	if (!new_mode)
> +		return 0;
> +
> +	if (new_mode->ops->init) {
> +		int err;
> +
> +		err = new_mode->ops->init(team);
> +		if (err)
> +			return err;
> +	}
> +
> +	team->mode_kind = new_mode->kind;
> +	memcpy(&team->mode_ops, new_mode->ops, sizeof(struct team_mode_ops));
> +
> +	return 0;
> +}
> +

> +
> +/************************
> + * Rx path frame handler
> + ************************/
> +
> +/* note: already called with rcu_read_lock */
> +static rx_handler_result_t team_handle_frame(struct sk_buff **pskb)
> +{
> +	struct sk_buff *skb = *pskb;
> +	struct team_port *port;
> +	struct team *team;
> +	rx_handler_result_t res = RX_HANDLER_ANOTHER;
> +
> +	skb = skb_share_check(skb, GFP_ATOMIC);
> +	if (!skb)
> +		return RX_HANDLER_CONSUMED;
> +
> +	*pskb = skb;
> +
> +	port = team_port_get_rcu(skb->dev);
> +	team = port->team;
> +
> +	if (team->mode_ops.receive)

Hmm, you need ACCESS_ONCE() here or rcu_dereference()

See commit 4d97480b1806e883eb (bonding: use local function pointer of
bond->recv_probe in bond_handle_frame) for reference

> +		res = team->mode_ops.receive(team, port, skb);
> +
> +	if (res == RX_HANDLER_ANOTHER) {
> +		struct team_pcpu_stats *pcpu_stats;
> +
> +		pcpu_stats = this_cpu_ptr(team->pcpu_stats);
> +		u64_stats_update_begin(&pcpu_stats->syncp);
> +		pcpu_stats->rx_packets++;
> +		pcpu_stats->rx_bytes += skb->len;
> +		if (skb->pkt_type == PACKET_MULTICAST)
> +			pcpu_stats->rx_multicast++;
> +		u64_stats_update_end(&pcpu_stats->syncp);
> +
> +		skb->dev = team->dev;
> +	} else {
> +		this_cpu_inc(team->pcpu_stats->rx_dropped);
> +	}
> +
> +	return res;
> +}
> +


> +
> +static int team_port_enter(struct team *team, struct team_port *port)
> +{
> +	int err = 0;
> +
> +	dev_hold(team->dev);
> +	port->dev->priv_flags |= IFF_TEAM_PORT;
> +	if (team->mode_ops.port_enter) {
> +		err = team->mode_ops.port_enter(team, port);
> +		if (err)
> +			netdev_err(team->dev, "Device %s failed to enter team mode\n",
> +				   port->dev->name);

		Not sure if you need to unset IFF_TEAM_PORT;

> +	}
> +	return err;
> +}
> +


> diff --git a/include/linux/if_team.h b/include/linux/if_team.h
> new file mode 100644
> index 0000000..21581a7
> --- /dev/null
> +++ b/include/linux/if_team.h
> @@ -0,0 +1,233 @@
> +/*
> + * include/linux/if_team.h - Network team device driver header
> + * Copyright (c) 2011 Jiri Pirko <jpirko@...hat.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + */
> +
> +#ifndef _LINUX_IF_TEAM_H_
> +#define _LINUX_IF_TEAM_H_
> +
> +#ifdef __KERNEL__
> +
> +struct team_pcpu_stats {
> +	u64			rx_packets;
> +	u64			rx_bytes;
> +	u64			rx_multicast;
> +	u64			tx_packets;
> +	u64			tx_bytes;
> +	struct u64_stats_sync	syncp;
> +	u32			rx_dropped;
> +	u32			tx_dropped;

	"unsigned long" for these two fields ?

> +};
> +
> +struct team;
> +
> +struct team_port {
> +	struct net_device *dev;
> +	struct hlist_node hlist; /* node in hash list */
> +	struct list_head list; /* node in ordinary list */
> +	struct team *team;
> +	int index;
> +
> +	/*
> +	 * A place for storing original values of the device before it
> +	 * become a port.
> +	 */
> +	struct {
> +		unsigned char dev_addr[MAX_ADDR_LEN];
> +		unsigned int mtu;
> +	} orig;
> +
> +	bool linkup;
> +	u32 speed;
> +	u8 duplex;
> +
> +	struct rcu_head rcu;
> +};
> +
> +struct team_mode_ops {
> +	int (*init)(struct team *team);
> +	void (*exit)(struct team *team);
> +	rx_handler_result_t (*receive)(struct team *team,
> +				       struct team_port *port,
> +				       struct sk_buff *skb);
> +	bool (*transmit)(struct team *team, struct sk_buff *skb);
> +	int (*port_enter)(struct team *team, struct team_port *port);
> +	void (*port_leave)(struct team *team, struct team_port *port);
> +	void (*port_change_mac)(struct team *team, struct team_port *port);
> +};
> +
> +enum team_option_type {
> +	TEAM_OPTION_TYPE_U32,
> +	TEAM_OPTION_TYPE_STRING,
> +};
> +
> +struct team_option {
> +	struct list_head list;
> +	const char *name;
> +	enum team_option_type type;
> +	int (*getter)(struct team *team, void *arg);
> +	int (*setter)(struct team *team, void *arg);
> +};
> +
> +struct team_mode {
> +	struct list_head list;
> +	const char *kind;
> +	struct module *owner;
> +	size_t priv_size;
> +	const struct team_mode_ops *ops;
> +};
> +
> +#define TEAM_MODE_PRIV_LONGS 4
> +#define TEAM_MODE_PRIV_SIZE (sizeof(long) * TEAM_MODE_PRIV_LONGS)
> +
> +struct team {
> +	struct net_device *dev; /* associated netdevice */


> +	struct team_pcpu_stats __percpu *pcpu_stats;

	I believe you can use net_device->anonymous_union, the one with
ml_priv :
	struct pcpu_lstats __percpu *lstats; /* loopback stats */
	struct pcpu_tstats __percpu *tstats; /* tunnel stats */
	struct pcpu_dstats __percpu *dstats; /* dummy stats */
and add here
	struct team_pcpu_stats __percpu *team_stats;

> +
> +	spinlock_t lock; /* used for overall locking, e.g. port lists write */
> +
> +	/*
> +	 * port lists with port count
> +	 */
> +	int port_count;
> +	struct hlist_head *port_hlist;

	I am not sure why you want an external hash table, with 16 pointers...
This could be embedded here to remove one dereference ?

> +	struct list_head port_list;
> +
> +	struct list_head option_list;
> +
> +	const char *mode_kind;
> +	struct team_mode_ops mode_ops;
> +	long mode_priv[TEAM_MODE_PRIV_LONGS];
> +};
> +


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ