lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Date: Sat, 22 Sep 2012 23:18:24 +0200 From: richard -rw- weinberger <richard.weinberger@...il.com> To: John Stultz <john.stultz@...aro.org> Cc: LKML <linux-kernel@...r.kernel.org>, JP Abgrall <jpa@...gle.com>, netdev@...r.kernel.org, Ashish Sharma <ashishsharma@...gle.com>, Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@...el.com> Subject: Re: [PATCH 2/7][RFC] netfilter: add xt_qtaguid matching module Just a few comments, please see below. In general I'd send this module also to netfilter-devel@... and get rid of most debugging stuff. On Sat, Sep 22, 2012 at 4:10 AM, John Stultz <john.stultz@...aro.org> wrote: > Put procfs dirs in /proc/net/xt_qtaguid/ > ctrl > stats > iface_stat/<iface>/... > The uid stats are obtainable in ./stats. Do we really want new files in /proc? > +static const char *module_procdirname = "xt_qtaguid"; Why not char[]? > +static struct proc_dir_entry *xt_qtaguid_procdir; > + > +static unsigned int proc_iface_perms = S_IRUGO; > +module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR); > + > +static struct proc_dir_entry *xt_qtaguid_stats_file; > +static unsigned int proc_stats_perms = S_IRUGO; > +module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR); > + > +static struct proc_dir_entry *xt_qtaguid_ctrl_file; > +static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUSR; > +module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR); > + > +/* 0 means, don't limit anybody */ > +static gid_t proc_stats_readall_gid; > +static gid_t proc_ctrl_write_gid; > +module_param_named(stats_readall_gid, proc_stats_readall_gid, uint, > + S_IRUGO | S_IWUSR); > +module_param_named(ctrl_write_gid, proc_ctrl_write_gid, uint, > + S_IRUGO | S_IWUSR); > + > +/* > + * Limit the number of active tags (via socket tags) for a given UID. > + * Multiple processes could share the UID. > + */ > +static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS; > +module_param(max_sock_tags, int, S_IRUGO | S_IWUSR); > + > +/* > + * After the kernel has initiallized this module, it is still possible > + * to make it passive. > + * Setting passive to Y: > + * - the iface stats handling will not act on notifications. > + * - iptables matches will never match. > + * - ctrl commands silently succeed. > + * - stats are always empty. > + * This is mostly usefull when a bug is suspected. > + */ > +static bool module_passive; > +module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR); > + > +/* > + * Control how qtaguid data is tracked per proc/uid. > + * Setting tag_tracking_passive to Y: > + * - don't create proc specific structs to track tags > + * - don't check that active tag stats exceed some limits. > + * - don't clean up socket tags on process exits. > + * This is mostly usefull when a bug is suspected. > + */ > +static bool qtu_proc_handling_passive; > +module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool, > + S_IRUGO | S_IWUSR); > + > +#define QTU_DEV_NAME "xt_qtaguid" We have this string already in module_procdirname. > +static LIST_HEAD(iface_stat_list); > +static DEFINE_SPINLOCK(iface_stat_list_lock); > + > +static struct rb_root sock_tag_tree = RB_ROOT; > +static DEFINE_SPINLOCK(sock_tag_list_lock); > + > +static struct rb_root tag_counter_set_tree = RB_ROOT; > +static DEFINE_SPINLOCK(tag_counter_set_list_lock); > + > +static struct rb_root uid_tag_data_tree = RB_ROOT; > +static DEFINE_SPINLOCK(uid_tag_data_tree_lock); > + > +static struct rb_root proc_qtu_data_tree = RB_ROOT; > +/* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */ > + > +static struct qtaguid_event_counts qtu_events; > +/*----------------------------------------------*/ > +static bool can_manipulate_uids(void) > +{ > + /* root pwnd */ > + return unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_gid) > + || in_egroup_p(proc_ctrl_write_gid); > +} > + > +static bool can_impersonate_uid(uid_t uid) > +{ > + return uid == current_fsuid() || can_manipulate_uids(); > +} > + > +static bool can_read_other_uid_stats(uid_t uid) > +{ > + /* root pwnd */ > + return unlikely(!current_fsuid()) || uid == current_fsuid() > + || unlikely(!proc_stats_readall_gid) > + || in_egroup_p(proc_stats_readall_gid); > +} > + > +static inline void dc_add_byte_packets(struct data_counters *counters, int set, > + enum ifs_tx_rx direction, > + enum ifs_proto ifs_proto, > + int bytes, > + int packets) > +{ > + counters->bpc[set][direction][ifs_proto].bytes += bytes; > + counters->bpc[set][direction][ifs_proto].packets += packets; > +} > + > +static inline uint64_t dc_sum_bytes(struct data_counters *counters, > + int set, > + enum ifs_tx_rx direction) > +{ > + return counters->bpc[set][direction][IFS_TCP].bytes > + + counters->bpc[set][direction][IFS_UDP].bytes > + + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes; > +} > + > +static inline uint64_t dc_sum_packets(struct data_counters *counters, > + int set, > + enum ifs_tx_rx direction) > +{ > + return counters->bpc[set][direction][IFS_TCP].packets > + + counters->bpc[set][direction][IFS_UDP].packets > + + counters->bpc[set][direction][IFS_PROTO_OTHER].packets; > +} > + > +static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag) > +{ > + struct rb_node *node = root->rb_node; > + > + while (node) { > + struct tag_node *data = rb_entry(node, struct tag_node, node); > + int result; > + RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): " > + " node=%p data=%p\n", tag, node, data); > + result = tag_compare(tag, data->tag); > + RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): " > + " data.tag=0x%llx (uid=%u) res=%d\n", > + tag, data->tag, get_uid_from_tag(data->tag), result); > + if (result < 0) > + node = node->rb_left; > + else if (result > 0) > + node = node->rb_right; > + else > + return data; > + } > + return NULL; > +} > + > +static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root) > +{ > + struct rb_node **new = &(root->rb_node), *parent = NULL; > + > + /* Figure out where to put new node */ > + while (*new) { > + struct tag_node *this = rb_entry(*new, struct tag_node, > + node); > + int result = tag_compare(data->tag, this->tag); > + RB_DEBUG("qtaguid: %s(): tag=0x%llx" > + " (uid=%u)\n", __func__, > + this->tag, > + get_uid_from_tag(this->tag)); > + parent = *new; > + if (result < 0) > + new = &((*new)->rb_left); > + else if (result > 0) > + new = &((*new)->rb_right); > + else > + BUG(); WARN_ONCE(), please. Otherwise an attacker may trigger the BUG() remotely in case of an implementation error... > + } > + > + /* Add new node and rebalance tree. */ > + rb_link_node(&data->node, parent, new); > + rb_insert_color(&data->node, root); > +} > + > +static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root) > +{ > + tag_node_tree_insert(&data->tn, root); > +} > + > +static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag) > +{ > + struct tag_node *node = tag_node_tree_search(root, tag); > + if (!node) > + return NULL; > + return rb_entry(&node->node, struct tag_stat, tn.node); > +} > + > +static void tag_counter_set_tree_insert(struct tag_counter_set *data, > + struct rb_root *root) > +{ > + tag_node_tree_insert(&data->tn, root); > +} > + > +static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root, > + tag_t tag) > +{ > + struct tag_node *node = tag_node_tree_search(root, tag); > + if (!node) > + return NULL; > + return rb_entry(&node->node, struct tag_counter_set, tn.node); > + > +} > + > +static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root) > +{ > + tag_node_tree_insert(&data->tn, root); > +} > + > +static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag) > +{ > + struct tag_node *node = tag_node_tree_search(root, tag); > + if (!node) > + return NULL; > + return rb_entry(&node->node, struct tag_ref, tn.node); > +} > + > +static struct sock_tag *sock_tag_tree_search(struct rb_root *root, > + const struct sock *sk) > +{ > + struct rb_node *node = root->rb_node; > + > + while (node) { > + struct sock_tag *data = rb_entry(node, struct sock_tag, > + sock_node); > + if (sk < data->sk) > + node = node->rb_left; > + else if (sk > data->sk) > + node = node->rb_right; > + else > + return data; > + } > + return NULL; > +} > + > +static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root) > +{ > + struct rb_node **new = &(root->rb_node), *parent = NULL; > + > + /* Figure out where to put new node */ > + while (*new) { > + struct sock_tag *this = rb_entry(*new, struct sock_tag, > + sock_node); > + parent = *new; > + if (data->sk < this->sk) > + new = &((*new)->rb_left); > + else if (data->sk > this->sk) > + new = &((*new)->rb_right); > + else > + BUG(); Same here. > + } > + > + /* Add new node and rebalance tree. */ > + rb_link_node(&data->sock_node, parent, new); > + rb_insert_color(&data->sock_node, root); > +} > + > +static void sock_tag_tree_erase(struct rb_root *st_to_free_tree) > +{ > + struct rb_node *node; > + struct sock_tag *st_entry; > + > + node = rb_first(st_to_free_tree); > + while (node) { > + st_entry = rb_entry(node, struct sock_tag, sock_node); > + node = rb_next(node); > + CT_DEBUG("qtaguid: %s(): " > + "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__, > + st_entry->sk, > + st_entry->tag, > + get_uid_from_tag(st_entry->tag)); > + rb_erase(&st_entry->sock_node, st_to_free_tree); > + sockfd_put(st_entry->socket); > + kfree(st_entry); > + } > +} > + > +static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root, > + const pid_t pid) > +{ > + struct rb_node *node = root->rb_node; > + > + while (node) { > + struct proc_qtu_data *data = rb_entry(node, > + struct proc_qtu_data, > + node); > + if (pid < data->pid) > + node = node->rb_left; > + else if (pid > data->pid) > + node = node->rb_right; > + else > + return data; > + } > + return NULL; > +} > + > +static void proc_qtu_data_tree_insert(struct proc_qtu_data *data, > + struct rb_root *root) > +{ > + struct rb_node **new = &(root->rb_node), *parent = NULL; > + > + /* Figure out where to put new node */ > + while (*new) { > + struct proc_qtu_data *this = rb_entry(*new, > + struct proc_qtu_data, > + node); > + parent = *new; > + if (data->pid < this->pid) > + new = &((*new)->rb_left); > + else if (data->pid > this->pid) > + new = &((*new)->rb_right); > + else > + BUG(); Same here. > + } > + > + /* Add new node and rebalance tree. */ > + rb_link_node(&data->node, parent, new); > + rb_insert_color(&data->node, root); > +} > + > +static void uid_tag_data_tree_insert(struct uid_tag_data *data, > + struct rb_root *root) > +{ > + struct rb_node **new = &(root->rb_node), *parent = NULL; > + > + /* Figure out where to put new node */ > + while (*new) { > + struct uid_tag_data *this = rb_entry(*new, > + struct uid_tag_data, > + node); > + parent = *new; > + if (data->uid < this->uid) > + new = &((*new)->rb_left); > + else if (data->uid > this->uid) > + new = &((*new)->rb_right); > + else > + BUG(); Same here. > + } > + > + /* Add new node and rebalance tree. */ > + rb_link_node(&data->node, parent, new); > + rb_insert_color(&data->node, root); > +} > + > +static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root, > + uid_t uid) > +{ > + struct rb_node *node = root->rb_node; > + > + while (node) { > + struct uid_tag_data *data = rb_entry(node, > + struct uid_tag_data, > + node); > + if (uid < data->uid) > + node = node->rb_left; > + else if (uid > data->uid) > + node = node->rb_right; > + else > + return data; > + } > + return NULL; > +} > + > +/* > + * Allocates a new uid_tag_data struct if needed. > + * Returns a pointer to the found or allocated uid_tag_data. > + * Returns a PTR_ERR on failures, and lock is not held. > + * If found is not NULL: > + * sets *found to true if not allocated. > + * sets *found to false if allocated. > + */ > +struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res) > +{ > + struct uid_tag_data *utd_entry; > + > + /* Look for top level uid_tag_data for the UID */ > + utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid); > + DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry); > + > + if (found_res) > + *found_res = utd_entry; > + if (utd_entry) > + return utd_entry; > + > + utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC); > + if (!utd_entry) { > + pr_err("qtaguid: get_uid_data(%u): " > + "tag data alloc failed\n", uid); > + return ERR_PTR(-ENOMEM); > + } > + > + utd_entry->uid = uid; > + utd_entry->tag_ref_tree = RB_ROOT; > + uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree); > + DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry); > + return utd_entry; > +} > + > +/* Never returns NULL. Either PTR_ERR or a valid ptr. */ > +static struct tag_ref *new_tag_ref(tag_t new_tag, > + struct uid_tag_data *utd_entry) > +{ > + struct tag_ref *tr_entry; > + int res; > + > + if (utd_entry->num_active_tags + 1 > max_sock_tags) { > + pr_info("qtaguid: new_tag_ref(0x%llx): " > + "tag ref alloc quota exceeded. max=%d\n", > + new_tag, max_sock_tags); > + res = -EMFILE; > + goto err_res; > + > + } > + > + tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC); > + if (!tr_entry) { > + pr_err("qtaguid: new_tag_ref(0x%llx): " > + "tag ref alloc failed\n", > + new_tag); > + res = -ENOMEM; > + goto err_res; > + } > + tr_entry->tn.tag = new_tag; > + /* tr_entry->num_sock_tags handled by caller */ > + utd_entry->num_active_tags++; > + tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree); > + DR_DEBUG("qtaguid: new_tag_ref(0x%llx): " > + " inserted new tag ref %p\n", > + new_tag, tr_entry); > + return tr_entry; > + > +err_res: > + return ERR_PTR(res); > +} > + > +static struct tag_ref *lookup_tag_ref(tag_t full_tag, > + struct uid_tag_data **utd_res) > +{ > + struct uid_tag_data *utd_entry; > + struct tag_ref *tr_entry; > + bool found_utd; > + uid_t uid = get_uid_from_tag(full_tag); > + > + DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n", > + full_tag, uid); > + > + utd_entry = get_uid_data(uid, &found_utd); > + if (IS_ERR_OR_NULL(utd_entry)) { > + if (utd_res) > + *utd_res = utd_entry; > + return NULL; > + } > + > + tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag); > + if (utd_res) > + *utd_res = utd_entry; > + DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n", > + full_tag, utd_entry, tr_entry); > + return tr_entry; > +} > + > +/* Never returns NULL. Either PTR_ERR or a valid ptr. */ > +static struct tag_ref *get_tag_ref(tag_t full_tag, > + struct uid_tag_data **utd_res) > +{ > + struct uid_tag_data *utd_entry; > + struct tag_ref *tr_entry; > + > + DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n", > + full_tag); > + spin_lock_bh(&uid_tag_data_tree_lock); > + tr_entry = lookup_tag_ref(full_tag, &utd_entry); > + BUG_ON(IS_ERR_OR_NULL(utd_entry)); > + if (!tr_entry) > + tr_entry = new_tag_ref(full_tag, utd_entry); > + > + spin_unlock_bh(&uid_tag_data_tree_lock); > + if (utd_res) > + *utd_res = utd_entry; > + DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n", > + full_tag, utd_entry, tr_entry); > + return tr_entry; > +} > + > +/* Checks and maybe frees the UID Tag Data entry */ > +static void put_utd_entry(struct uid_tag_data *utd_entry) > +{ > + /* Are we done with the UID tag data entry? */ > + if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) && > + !utd_entry->num_pqd) { > + DR_DEBUG("qtaguid: %s(): " > + "erase utd_entry=%p uid=%u " > + "by pid=%u tgid=%u uid=%u\n", __func__, > + utd_entry, utd_entry->uid, > + current->pid, current->tgid, current_fsuid()); > + BUG_ON(utd_entry->num_active_tags); > + rb_erase(&utd_entry->node, &uid_tag_data_tree); > + kfree(utd_entry); > + } else { > + DR_DEBUG("qtaguid: %s(): " > + "utd_entry=%p still has %d tags %d proc_qtu_data\n", > + __func__, utd_entry, utd_entry->num_active_tags, > + utd_entry->num_pqd); > + BUG_ON(!(utd_entry->num_active_tags || > + utd_entry->num_pqd)); > + } > +} > + > +/* > + * If no sock_tags are using this tag_ref, > + * decrements refcount of utd_entry, removes tr_entry > + * from utd_entry->tag_ref_tree and frees. > + */ > +static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry, > + struct uid_tag_data *utd_entry) > +{ > + DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__, > + tr_entry, tr_entry->tn.tag, > + get_uid_from_tag(tr_entry->tn.tag)); > + if (!tr_entry->num_sock_tags) { > + BUG_ON(!utd_entry->num_active_tags); > + utd_entry->num_active_tags--; > + rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree); > + DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry); > + kfree(tr_entry); > + } > +} > + > +static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry) > +{ > + struct rb_node *node; > + struct tag_ref *tr_entry; > + tag_t acct_tag; > + > + DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__, > + full_tag, get_uid_from_tag(full_tag)); > + acct_tag = get_atag_from_tag(full_tag); > + node = rb_first(&utd_entry->tag_ref_tree); > + while (node) { > + tr_entry = rb_entry(node, struct tag_ref, tn.node); > + node = rb_next(node); > + if (!acct_tag || tr_entry->tn.tag == full_tag) > + free_tag_ref_from_utd_entry(tr_entry, utd_entry); > + } > +} > + > +static int read_proc_u64(char *page, char **start, off_t off, > + int count, int *eof, void *data) > +{ > + int len; > + uint64_t value; > + char *p = page; > + uint64_t *iface_entry = data; > + > + if (!data) > + return 0; > + > + value = *iface_entry; > + p += sprintf(p, "%llu\n", value); > + len = (p - page) - off; > + *eof = (len <= count) ? 1 : 0; > + *start = page + off; > + return len; > +} > + > +static int read_proc_bool(char *page, char **start, off_t off, > + int count, int *eof, void *data) > +{ > + int len; > + bool value; > + char *p = page; > + bool *bool_entry = data; > + > + if (!data) > + return 0; > + > + value = *bool_entry; > + p += sprintf(p, "%u\n", value); > + len = (p - page) - off; > + *eof = (len <= count) ? 1 : 0; > + *start = page + off; > + return len; > +} > + > +static int get_active_counter_set(tag_t tag) > +{ > + int active_set = 0; > + struct tag_counter_set *tcs; > + > + MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)" > + " (uid=%u)\n", > + tag, get_uid_from_tag(tag)); > + /* For now we only handle UID tags for active sets */ > + tag = get_utag_from_tag(tag); > + spin_lock_bh(&tag_counter_set_list_lock); > + tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag); > + if (tcs) > + active_set = tcs->active_set; > + spin_unlock_bh(&tag_counter_set_list_lock); > + return active_set; > +} > + > +/* > + * Find the entry for tracking the specified interface. > + * Caller must hold iface_stat_list_lock > + */ > +static struct iface_stat *get_iface_entry(const char *ifname) > +{ > + struct iface_stat *iface_entry; > + > + /* Find the entry for tracking the specified tag within the interface */ > + if (ifname == NULL) { > + pr_info("qtaguid: iface_stat: get() NULL device name\n"); > + return NULL; > + } Can ifname really become NULL? > + /* Iterate over interfaces */ > + list_for_each_entry(iface_entry, &iface_stat_list, list) { > + if (!strcmp(ifname, iface_entry->ifname)) > + goto done; > + } > + iface_entry = NULL; > +done: > + return iface_entry; > +} > + > +static int iface_stat_all_proc_read(char *page, char **num_items_returned, > + off_t items_to_skip, int char_count, > + int *eof, void *data) > +{ > + char *outp = page; > + int item_index = 0; > + int len; > + struct iface_stat *iface_entry; > + struct rtnl_link_stats64 dev_stats, *stats; > + struct rtnl_link_stats64 no_dev_stats = {0}; > + > + if (unlikely(module_passive)) { > + *eof = 1; > + return 0; > + } > + > + CT_DEBUG("qtaguid:proc iface_stat_all " > + "page=%p *num_items_returned=%p off=%ld " > + "char_count=%d *eof=%d\n", page, *num_items_returned, > + items_to_skip, char_count, *eof); > + > + if (*eof) > + return 0; > + > + /* > + * This lock will prevent iface_stat_update() from changing active, > + * and in turn prevent an interface from unregistering itself. > + */ > + spin_lock_bh(&iface_stat_list_lock); > + list_for_each_entry(iface_entry, &iface_stat_list, list) { > + if (item_index++ < items_to_skip) > + continue; > + > + if (iface_entry->active) { > + stats = dev_get_stats(iface_entry->net_dev, > + &dev_stats); > + } else { > + stats = &no_dev_stats; > + } > + len = snprintf(outp, char_count, > + "%s %d " > + "%llu %llu %llu %llu " > + "%llu %llu %llu %llu\n", > + iface_entry->ifname, > + iface_entry->active, > + iface_entry->totals[IFS_RX].bytes, > + iface_entry->totals[IFS_RX].packets, > + iface_entry->totals[IFS_TX].bytes, > + iface_entry->totals[IFS_TX].packets, > + stats->rx_bytes, stats->rx_packets, > + stats->tx_bytes, stats->tx_packets); > + if (len >= char_count) { > + spin_unlock_bh(&iface_stat_list_lock); > + *outp = '\0'; > + return outp - page; > + } > + outp += len; > + char_count -= len; > + (*num_items_returned)++; > + } > + spin_unlock_bh(&iface_stat_list_lock); > + > + *eof = 1; > + return outp - page; > +} > + > +static void iface_create_proc_worker(struct work_struct *work) > +{ > + struct proc_dir_entry *proc_entry; > + struct iface_stat_work *isw = container_of(work, struct iface_stat_work, > + iface_work); > + struct iface_stat *new_iface = isw->iface_entry; > + > + /* iface_entries are not deleted, so safe to manipulate. */ > + proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir); > + if (IS_ERR_OR_NULL(proc_entry)) { > + pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n"); > + kfree(isw); > + return; > + } > + > + new_iface->proc_ptr = proc_entry; > + > + create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry, > + read_proc_u64, &new_iface->totals[IFS_TX].bytes); > + create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry, > + read_proc_u64, &new_iface->totals[IFS_RX].bytes); > + create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry, > + read_proc_u64, &new_iface->totals[IFS_TX].packets); > + create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry, > + read_proc_u64, &new_iface->totals[IFS_RX].packets); > + create_proc_read_entry("active", proc_iface_perms, proc_entry, > + read_proc_bool, &new_iface->active); > + > + IF_DEBUG("qtaguid: iface_stat: create_proc(): done " > + "entry=%p dev=%s\n", new_iface, new_iface->ifname); > + kfree(isw); > +} > + > +/* > + * Will set the entry's active state, and > + * update the net_dev accordingly also. > + */ > +static void _iface_stat_set_active(struct iface_stat *entry, > + struct net_device *net_dev, > + bool activate) > +{ > + if (activate) { > + entry->net_dev = net_dev; > + entry->active = true; > + IF_DEBUG("qtaguid: %s(%s): " > + "enable tracking. rfcnt=%d\n", __func__, > + entry->ifname, > + this_cpu_read(*net_dev->pcpu_refcnt)); > + } else { > + entry->active = false; > + entry->net_dev = NULL; > + IF_DEBUG("qtaguid: %s(%s): " > + "disable tracking. rfcnt=%d\n", __func__, > + entry->ifname, > + this_cpu_read(*net_dev->pcpu_refcnt)); > + > + } > +} > + > +/* Caller must hold iface_stat_list_lock */ > +static struct iface_stat *iface_alloc(struct net_device *net_dev) > +{ > + struct iface_stat *new_iface; > + struct iface_stat_work *isw; > + > + new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC); > + if (new_iface == NULL) { > + pr_err("qtaguid: iface_stat: create(%s): " > + "iface_stat alloc failed\n", net_dev->name); > + return NULL; > + } > + new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC); > + if (new_iface->ifname == NULL) { > + pr_err("qtaguid: iface_stat: create(%s): " > + "ifname alloc failed\n", net_dev->name); > + kfree(new_iface); > + return NULL; > + } > + spin_lock_init(&new_iface->tag_stat_list_lock); > + new_iface->tag_stat_tree = RB_ROOT; > + _iface_stat_set_active(new_iface, net_dev, true); > + > + /* > + * ipv6 notifier chains are atomic :( > + * No create_proc_read_entry() for you! > + */ > + isw = kmalloc(sizeof(*isw), GFP_ATOMIC); > + if (!isw) { > + pr_err("qtaguid: iface_stat: create(%s): " > + "work alloc failed\n", new_iface->ifname); > + _iface_stat_set_active(new_iface, net_dev, false); > + kfree(new_iface->ifname); > + kfree(new_iface); > + return NULL; > + } > + isw->iface_entry = new_iface; > + INIT_WORK(&isw->iface_work, iface_create_proc_worker); > + schedule_work(&isw->iface_work); > + list_add(&new_iface->list, &iface_stat_list); > + return new_iface; > +} > + > +static void iface_check_stats_reset_and_adjust(struct net_device *net_dev, > + struct iface_stat *iface) > +{ > + struct rtnl_link_stats64 dev_stats, *stats; > + bool stats_rewound; > + > + stats = dev_get_stats(net_dev, &dev_stats); > + /* No empty packets */ > + stats_rewound = > + (stats->rx_bytes < iface->last_known[IFS_RX].bytes) > + || (stats->tx_bytes < iface->last_known[IFS_TX].bytes); > + > + IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p " > + "bytes rx/tx=%llu/%llu " > + "active=%d last_known=%d " > + "stats_rewound=%d\n", __func__, > + net_dev ? net_dev->name : "?", > + iface, net_dev, > + stats->rx_bytes, stats->tx_bytes, > + iface->active, iface->last_known_valid, stats_rewound); > + > + if (iface->active && iface->last_known_valid && stats_rewound) { > + pr_warn_once("qtaguid: iface_stat: %s(%s): " > + "iface reset its stats unexpectedly\n", __func__, > + net_dev->name); > + > + iface->totals[IFS_TX].bytes += iface->last_known[IFS_TX].bytes; > + iface->totals[IFS_TX].packets += > + iface->last_known[IFS_TX].packets; > + iface->totals[IFS_RX].bytes += iface->last_known[IFS_RX].bytes; > + iface->totals[IFS_RX].packets += > + iface->last_known[IFS_RX].packets; > + iface->last_known_valid = false; > + IF_DEBUG("qtaguid: %s(%s): iface=%p " > + "used last known bytes rx/tx=%llu/%llu\n", __func__, > + iface->ifname, iface, iface->last_known[IFS_RX].bytes, > + iface->last_known[IFS_TX].bytes); > + } > +} > + > +/* > + * Create a new entry for tracking the specified interface. > + * Do nothing if the entry already exists. > + * Called when an interface is configured with a valid IP address. > + */ > +static void iface_stat_create(struct net_device *net_dev, > + struct in_ifaddr *ifa) > +{ > + struct in_device *in_dev = NULL; > + const char *ifname; > + struct iface_stat *entry; > + __be32 ipaddr = 0; > + struct iface_stat *new_iface; > + > + IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n", > + net_dev ? net_dev->name : "?", > + ifa, net_dev); > + if (!net_dev) { > + pr_err("qtaguid: iface_stat: create(): no net dev\n"); > + return; > + } > + > + ifname = net_dev->name; > + if (!ifa) { > + in_dev = in_dev_get(net_dev); > + if (!in_dev) { > + pr_err("qtaguid: iface_stat: create(%s): no inet dev\n", > + ifname); > + return; > + } > + IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n", > + ifname, in_dev); > + for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { > + IF_DEBUG("qtaguid: iface_stat: create(%s): " > + "ifa=%p ifa_label=%s\n", > + ifname, ifa, > + ifa->ifa_label ? ifa->ifa_label : "(null)"); > + if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label)) > + break; > + } > + } > + > + if (!ifa) { > + IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n", > + ifname); > + goto done_put; > + } > + ipaddr = ifa->ifa_local; > + > + spin_lock_bh(&iface_stat_list_lock); > + entry = get_iface_entry(ifname); > + if (entry != NULL) { > + bool activate = !ipv4_is_loopback(ipaddr); > + IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n", > + ifname, entry); > + iface_check_stats_reset_and_adjust(net_dev, entry); > + _iface_stat_set_active(entry, net_dev, activate); > + IF_DEBUG("qtaguid: %s(%s): " > + "tracking now %d on ip=%pI4\n", __func__, > + entry->ifname, activate, &ipaddr); > + goto done_unlock_put; > + } else if (ipv4_is_loopback(ipaddr)) { > + IF_DEBUG("qtaguid: iface_stat: create(%s): " > + "ignore loopback dev. ip=%pI4\n", ifname, &ipaddr); > + goto done_unlock_put; > + } > + > + new_iface = iface_alloc(net_dev); > + IF_DEBUG("qtaguid: iface_stat: create(%s): done " > + "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr); > +done_unlock_put: > + spin_unlock_bh(&iface_stat_list_lock); > +done_put: > + if (in_dev) > + in_dev_put(in_dev); > +} > + > +static void iface_stat_create_ipv6(struct net_device *net_dev, > + struct inet6_ifaddr *ifa) > +{ > + struct in_device *in_dev; > + const char *ifname; > + struct iface_stat *entry; > + struct iface_stat *new_iface; > + int addr_type; > + > + IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n", > + ifa, net_dev, net_dev ? net_dev->name : ""); > + if (!net_dev) { > + pr_err("qtaguid: iface_stat: create6(): no net dev!\n"); > + return; > + } > + ifname = net_dev->name; > + > + in_dev = in_dev_get(net_dev); > + if (!in_dev) { > + pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n", > + ifname); > + return; > + } > + > + IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n", > + ifname, in_dev); > + > + if (!ifa) { > + IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n", > + ifname); > + goto done_put; > + } > + addr_type = ipv6_addr_type(&ifa->addr); > + > + spin_lock_bh(&iface_stat_list_lock); > + entry = get_iface_entry(ifname); > + if (entry != NULL) { > + bool activate = !(addr_type & IPV6_ADDR_LOOPBACK); > + IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, > + ifname, entry); > + iface_check_stats_reset_and_adjust(net_dev, entry); > + _iface_stat_set_active(entry, net_dev, activate); > + IF_DEBUG("qtaguid: %s(%s): " > + "tracking now %d on ip=%pI6c\n", __func__, > + entry->ifname, activate, &ifa->addr); > + goto done_unlock_put; > + } else if (addr_type & IPV6_ADDR_LOOPBACK) { > + IF_DEBUG("qtaguid: %s(%s): " > + "ignore loopback dev. ip=%pI6c\n", __func__, > + ifname, &ifa->addr); > + goto done_unlock_put; > + } > + > + new_iface = iface_alloc(net_dev); > + IF_DEBUG("qtaguid: iface_stat: create6(%s): done " > + "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr); > + > +done_unlock_put: > + spin_unlock_bh(&iface_stat_list_lock); > +done_put: > + in_dev_put(in_dev); > +} > + > +static struct sock_tag *get_sock_stat_nl(const struct sock *sk) > +{ > + MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk); > + return sock_tag_tree_search(&sock_tag_tree, sk); > +} > + > +static struct sock_tag *get_sock_stat(const struct sock *sk) > +{ > + struct sock_tag *sock_tag_entry; > + MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk); > + if (!sk) > + return NULL; > + spin_lock_bh(&sock_tag_list_lock); > + sock_tag_entry = get_sock_stat_nl(sk); > + spin_unlock_bh(&sock_tag_list_lock); > + return sock_tag_entry; > +} > + > +static void > +data_counters_update(struct data_counters *dc, int set, > + enum ifs_tx_rx direction, int proto, int bytes) > +{ > + switch (proto) { > + case IPPROTO_TCP: > + dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1); > + break; > + case IPPROTO_UDP: > + dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1); > + break; > + case IPPROTO_IP: > + default: > + dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes, > + 1); > + break; > + } > +} > + > +/* > + * Update stats for the specified interface. Do nothing if the entry > + * does not exist (when a device was never configured with an IP address). > + * Called when an device is being unregistered. > + */ > +static void iface_stat_update(struct net_device *net_dev, bool stash_only) > +{ > + struct rtnl_link_stats64 dev_stats, *stats; > + struct iface_stat *entry; > + > + stats = dev_get_stats(net_dev, &dev_stats); > + spin_lock_bh(&iface_stat_list_lock); > + entry = get_iface_entry(net_dev->name); > + if (entry == NULL) { > + IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n", > + net_dev->name); > + spin_unlock_bh(&iface_stat_list_lock); > + return; > + } > + > + IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, > + net_dev->name, entry); > + if (!entry->active) { > + IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__, > + net_dev->name); > + spin_unlock_bh(&iface_stat_list_lock); > + return; > + } > + > + if (stash_only) { > + entry->last_known[IFS_TX].bytes = stats->tx_bytes; > + entry->last_known[IFS_TX].packets = stats->tx_packets; > + entry->last_known[IFS_RX].bytes = stats->rx_bytes; > + entry->last_known[IFS_RX].packets = stats->rx_packets; > + entry->last_known_valid = true; > + IF_DEBUG("qtaguid: %s(%s): " > + "dev stats stashed rx/tx=%llu/%llu\n", __func__, > + net_dev->name, stats->rx_bytes, stats->tx_bytes); > + spin_unlock_bh(&iface_stat_list_lock); > + return; > + } > + entry->totals[IFS_TX].bytes += stats->tx_bytes; > + entry->totals[IFS_TX].packets += stats->tx_packets; > + entry->totals[IFS_RX].bytes += stats->rx_bytes; > + entry->totals[IFS_RX].packets += stats->rx_packets; > + /* We don't need the last_known[] anymore */ > + entry->last_known_valid = false; > + _iface_stat_set_active(entry, net_dev, false); > + IF_DEBUG("qtaguid: %s(%s): " > + "disable tracking. rx/tx=%llu/%llu\n", __func__, > + net_dev->name, stats->rx_bytes, stats->tx_bytes); > + spin_unlock_bh(&iface_stat_list_lock); > +} > + > +static void tag_stat_update(struct tag_stat *tag_entry, > + enum ifs_tx_rx direction, int proto, int bytes) > +{ > + int active_set; > + active_set = get_active_counter_set(tag_entry->tn.tag); > + MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d " > + "dir=%d proto=%d bytes=%d)\n", > + tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag), > + active_set, direction, proto, bytes); > + data_counters_update(&tag_entry->counters, active_set, direction, > + proto, bytes); > + if (tag_entry->parent_counters) > + data_counters_update(tag_entry->parent_counters, active_set, > + direction, proto, bytes); > +} > + > +/* > + * Create a new entry for tracking the specified {acct_tag,uid_tag} within > + * the interface. > + * iface_entry->tag_stat_list_lock should be held. > + */ > +static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry, > + tag_t tag) > +{ > + struct tag_stat *new_tag_stat_entry = NULL; > + IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx" > + " (uid=%u)\n", __func__, > + iface_entry, tag, get_uid_from_tag(tag)); > + new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC); > + if (!new_tag_stat_entry) { > + pr_err("qtaguid: iface_stat: tag stat alloc failed\n"); > + goto done; > + } > + new_tag_stat_entry->tn.tag = tag; > + tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree); > +done: > + return new_tag_stat_entry; > +} > + > +static void if_tag_stat_update(const char *ifname, uid_t uid, > + const struct sock *sk, enum ifs_tx_rx direction, > + int proto, int bytes) > +{ > + struct tag_stat *tag_stat_entry; > + tag_t tag, acct_tag; > + tag_t uid_tag; > + struct data_counters *uid_tag_counters; > + struct sock_tag *sock_tag_entry; > + struct iface_stat *iface_entry; > + struct tag_stat *new_tag_stat; > + MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s " > + "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n", > + ifname, uid, sk, direction, proto, bytes); > + > + > + iface_entry = get_iface_entry(ifname); > + if (!iface_entry) { > + pr_err("qtaguid: iface_stat: stat_update() %s not found\n", > + ifname); > + return; > + } > + /* It is ok to process data when an iface_entry is inactive */ > + > + MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n", > + ifname, iface_entry); > + > + /* > + * Look for a tagged sock. > + * It will have an acct_uid. > + */ > + sock_tag_entry = get_sock_stat(sk); > + if (sock_tag_entry) { > + tag = sock_tag_entry->tag; > + acct_tag = get_atag_from_tag(tag); > + uid_tag = get_utag_from_tag(tag); > + } else { > + acct_tag = make_atag_from_value(0); > + tag = combine_atag_with_uid(acct_tag, uid); > + uid_tag = make_tag_from_uid(uid); > + } > + MT_DEBUG("qtaguid: iface_stat: stat_update(): " > + " looking for tag=0x%llx (uid=%u) in ife=%p\n", > + tag, get_uid_from_tag(tag), iface_entry); > + /* Loop over tag list under this interface for {acct_tag,uid_tag} */ > + spin_lock_bh(&iface_entry->tag_stat_list_lock); > + > + tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree, > + tag); > + if (tag_stat_entry) { > + /* > + * Updating the {acct_tag, uid_tag} entry handles both stats: > + * {0, uid_tag} will also get updated. > + */ > + tag_stat_update(tag_stat_entry, direction, proto, bytes); > + spin_unlock_bh(&iface_entry->tag_stat_list_lock); > + return; > + } > + > + /* Loop over tag list under this interface for {0,uid_tag} */ > + tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree, > + uid_tag); > + if (!tag_stat_entry) { > + /* Here: the base uid_tag did not exist */ > + /* > + * No parent counters. So > + * - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats. > + */ > + new_tag_stat = create_if_tag_stat(iface_entry, uid_tag); > + uid_tag_counters = &new_tag_stat->counters; > + } else { > + uid_tag_counters = &tag_stat_entry->counters; > + } > + > + if (acct_tag) { > + new_tag_stat = create_if_tag_stat(iface_entry, tag); > + new_tag_stat->parent_counters = uid_tag_counters; > + } > + tag_stat_update(new_tag_stat, direction, proto, bytes); > + spin_unlock_bh(&iface_entry->tag_stat_list_lock); > +} > + > +static int iface_netdev_event_handler(struct notifier_block *nb, > + unsigned long event, void *ptr) { > + struct net_device *dev = ptr; > + > + if (unlikely(module_passive)) > + return NOTIFY_DONE; > + > + IF_DEBUG("qtaguid: iface_stat: netdev_event(): " > + "ev=0x%lx/%s netdev=%p->name=%s\n", > + event, netdev_evt_str(event), dev, dev ? dev->name : ""); > + > + switch (event) { > + case NETDEV_UP: > + iface_stat_create(dev, NULL); > + atomic64_inc(&qtu_events.iface_events); > + break; > + case NETDEV_DOWN: > + case NETDEV_UNREGISTER: > + iface_stat_update(dev, event == NETDEV_DOWN); > + atomic64_inc(&qtu_events.iface_events); > + break; > + } > + return NOTIFY_DONE; > +} > + > +static int iface_inet6addr_event_handler(struct notifier_block *nb, > + unsigned long event, void *ptr) > +{ > + struct inet6_ifaddr *ifa = ptr; > + struct net_device *dev; > + > + if (unlikely(module_passive)) > + return NOTIFY_DONE; > + > + IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): " > + "ev=0x%lx/%s ifa=%p\n", > + event, netdev_evt_str(event), ifa); > + > + switch (event) { > + case NETDEV_UP: > + BUG_ON(!ifa || !ifa->idev); > + dev = (struct net_device *)ifa->idev->dev; > + iface_stat_create_ipv6(dev, ifa); > + atomic64_inc(&qtu_events.iface_events); > + break; > + case NETDEV_DOWN: > + case NETDEV_UNREGISTER: > + BUG_ON(!ifa || !ifa->idev); > + dev = (struct net_device *)ifa->idev->dev; > + iface_stat_update(dev, event == NETDEV_DOWN); > + atomic64_inc(&qtu_events.iface_events); > + break; > + } > + return NOTIFY_DONE; > +} > + > +static int iface_inetaddr_event_handler(struct notifier_block *nb, > + unsigned long event, void *ptr) > +{ > + struct in_ifaddr *ifa = ptr; > + struct net_device *dev; > + > + if (unlikely(module_passive)) > + return NOTIFY_DONE; > + > + IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): " > + "ev=0x%lx/%s ifa=%p\n", > + event, netdev_evt_str(event), ifa); > + > + switch (event) { > + case NETDEV_UP: > + BUG_ON(!ifa || !ifa->ifa_dev); > + dev = ifa->ifa_dev->dev; > + iface_stat_create(dev, ifa); > + atomic64_inc(&qtu_events.iface_events); > + break; > + case NETDEV_DOWN: > + case NETDEV_UNREGISTER: > + BUG_ON(!ifa || !ifa->ifa_dev); > + dev = ifa->ifa_dev->dev; > + iface_stat_update(dev, event == NETDEV_DOWN); > + atomic64_inc(&qtu_events.iface_events); > + break; > + } > + return NOTIFY_DONE; > +} > + > +static struct notifier_block iface_netdev_notifier_blk = { > + .notifier_call = iface_netdev_event_handler, > +}; > + > +static struct notifier_block iface_inetaddr_notifier_blk = { > + .notifier_call = iface_inetaddr_event_handler, > +}; > + > +static struct notifier_block iface_inet6addr_notifier_blk = { > + .notifier_call = iface_inet6addr_event_handler, > +}; > + > +static int __init iface_stat_init(struct proc_dir_entry *parent_procdir) > +{ > + int err; > + > + iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir); > + if (!iface_stat_procdir) { > + pr_err("qtaguid: iface_stat: init failed to create proc entry\n"); > + err = -1; > + goto err; > + } > + > + iface_stat_all_procfile = create_proc_entry(iface_stat_all_procfilename, > + proc_iface_perms, > + parent_procdir); > + if (!iface_stat_all_procfile) { > + pr_err("qtaguid: iface_stat: init " > + " failed to create stat_all proc entry\n"); > + err = -1; > + goto err_zap_entry; > + } > + iface_stat_all_procfile->read_proc = iface_stat_all_proc_read; > + > + > + err = register_netdevice_notifier(&iface_netdev_notifier_blk); > + if (err) { > + pr_err("qtaguid: iface_stat: init " > + "failed to register dev event handler\n"); > + goto err_zap_all_stats_entry; > + } > + err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk); > + if (err) { > + pr_err("qtaguid: iface_stat: init " > + "failed to register ipv4 dev event handler\n"); > + goto err_unreg_nd; > + } > + > + err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk); > + if (err) { > + pr_err("qtaguid: iface_stat: init " > + "failed to register ipv6 dev event handler\n"); > + goto err_unreg_ip4_addr; > + } > + return 0; > + > +err_unreg_ip4_addr: > + unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk); > +err_unreg_nd: > + unregister_netdevice_notifier(&iface_netdev_notifier_blk); > +err_zap_all_stats_entry: > + remove_proc_entry(iface_stat_all_procfilename, parent_procdir); > +err_zap_entry: > + remove_proc_entry(iface_stat_procdirname, parent_procdir); > +err: > + return err; > +} > + > +static struct sock *qtaguid_find_sk(const struct sk_buff *skb, > + struct xt_action_param *par) > +{ > + struct sock *sk; > + unsigned int hook_mask = (1 << par->hooknum); > + > + MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb, > + par->hooknum, par->family); > + > + /* > + * Let's not abuse the the xt_socket_get*_sk(), or else it will > + * return garbage SKs. > + */ > + if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS)) > + return NULL; > + > + switch (par->family) { > + case NFPROTO_IPV6: > + sk = xt_socket_get6_sk(skb, par); > + break; > + case NFPROTO_IPV4: > + sk = xt_socket_get4_sk(skb, par); > + break; > + default: > + return NULL; > + } > + > + /* > + * Seems to be issues on the file ptr for TCP_TIME_WAIT SKs. > + * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959 > + * Not fixed in 3.0-r3 :( > + */ Is it fixed now? > + if (sk) { > + MT_DEBUG("qtaguid: %p->sk_proto=%u " > + "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state); > + if (sk->sk_state == TCP_TIME_WAIT) { > + xt_socket_put_sk(sk); > + sk = NULL; > + } > + } > + return sk; > +} > + > +static void account_for_uid(const struct sk_buff *skb, > + const struct sock *alternate_sk, uid_t uid, > + struct xt_action_param *par) > +{ > + const struct net_device *el_dev; > + > + if (!skb->dev) { > + MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum); > + el_dev = par->in ? : par->out; > + } else { > + const struct net_device *other_dev; > + el_dev = skb->dev; > + other_dev = par->in ? : par->out; > + if (el_dev != other_dev) { > + MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs " > + "par->(in/out)=%p %s\n", > + par->hooknum, el_dev, el_dev->name, other_dev, > + other_dev->name); > + } > + } > + > + if (unlikely(!el_dev)) { > + pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum); > + } else if (unlikely(!el_dev->name)) { > + pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum); > + } else { > + MT_DEBUG("qtaguid[%d]: dev name=%s type=%d\n", > + par->hooknum, > + el_dev->name, > + el_dev->type); > + > + if_tag_stat_update(el_dev->name, uid, > + skb->sk ? skb->sk : alternate_sk, > + par->in ? IFS_RX : IFS_TX, > + ip_hdr(skb)->protocol, skb->len); > + } > +} > + > +static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) > +{ > + const struct xt_qtaguid_match_info *info = par->matchinfo; > + const struct file *filp; > + bool got_sock = false; > + struct sock *sk; > + uid_t sock_uid; > + bool res; > + > + if (unlikely(module_passive)) > + return (info->match ^ info->invert) == 0; > + > + MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n", > + par->hooknum, skb, par->in, par->out, par->family); > + > + atomic64_inc(&qtu_events.match_calls); > + if (skb == NULL) { > + res = (info->match ^ info->invert) == 0; > + goto ret_res; > + } > + > + sk = skb->sk; > + > + if (sk == NULL) { > + /* > + * A missing sk->sk_socket happens when packets are in-flight > + * and the matching socket is already closed and gone. > + */ > + sk = qtaguid_find_sk(skb, par); > + /* > + * If we got the socket from the find_sk(), we will need to put > + * it back, as nf_tproxy_get_sock_v4() got it. > + */ > + got_sock = sk; > + if (sk) > + atomic64_inc(&qtu_events.match_found_sk_in_ct); > + else > + atomic64_inc(&qtu_events.match_found_no_sk_in_ct); > + } else { > + atomic64_inc(&qtu_events.match_found_sk); > + } > + MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d proto=%d\n", > + par->hooknum, sk, got_sock, ip_hdr(skb)->protocol); > + if (sk != NULL) { > + MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n", > + par->hooknum, sk, sk->sk_socket, > + sk->sk_socket ? sk->sk_socket->file : (void *)-1LL); > + filp = sk->sk_socket ? sk->sk_socket->file : NULL; > + MT_DEBUG("qtaguid[%d]: filp...uid=%u\n", > + par->hooknum, filp ? filp->f_cred->fsuid : -1); > + } > + > + if (sk == NULL || sk->sk_socket == NULL) { > + /* > + * Here, the qtaguid_find_sk() using connection tracking > + * couldn't find the owner, so for now we just count them > + * against the system. > + */ > + /* > + * TODO: unhack how to force just accounting. > + * For now we only do iface stats when the uid-owner is not > + * requested. > + */ > + if (!(info->match & XT_QTAGUID_UID)) > + account_for_uid(skb, sk, 0, par); > + MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n", > + par->hooknum, > + sk ? sk->sk_socket : NULL); > + res = (info->match ^ info->invert) == 0; > + atomic64_inc(&qtu_events.match_no_sk); > + goto put_sock_ret_res; > + } else if (info->match & info->invert & XT_QTAGUID_SOCKET) { > + res = false; > + goto put_sock_ret_res; > + } > + filp = sk->sk_socket->file; > + if (filp == NULL) { > + MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum); > + account_for_uid(skb, sk, 0, par); > + res = ((info->match ^ info->invert) & > + (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0; > + atomic64_inc(&qtu_events.match_no_sk_file); > + goto put_sock_ret_res; > + } > + sock_uid = filp->f_cred->fsuid; > + /* > + * TODO: unhack how to force just accounting. > + * For now we only do iface stats when the uid-owner is not requested > + */ > + if (!(info->match & XT_QTAGUID_UID)) > + account_for_uid(skb, sk, sock_uid, par); > + > + /* > + * The following two tests fail the match when: > + * id not in range AND no inverted condition requested > + * or id in range AND inverted condition requested > + * Thus (!a && b) || (a && !b) == a ^ b > + */ > + if (info->match & XT_QTAGUID_UID) > + if ((filp->f_cred->fsuid >= info->uid_min && > + filp->f_cred->fsuid <= info->uid_max) ^ > + !(info->invert & XT_QTAGUID_UID)) { > + MT_DEBUG("qtaguid[%d]: leaving uid not matching\n", > + par->hooknum); > + res = false; > + goto put_sock_ret_res; > + } > + if (info->match & XT_QTAGUID_GID) > + if ((filp->f_cred->fsgid >= info->gid_min && > + filp->f_cred->fsgid <= info->gid_max) ^ > + !(info->invert & XT_QTAGUID_GID)) { > + MT_DEBUG("qtaguid[%d]: leaving gid not matching\n", > + par->hooknum); > + res = false; > + goto put_sock_ret_res; > + } > + > + MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum); > + res = true; > + > +put_sock_ret_res: > + if (got_sock) > + xt_socket_put_sk(sk); > +ret_res: > + MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res); > + return res; > +} > + > +#ifdef DDEBUG > +/* This function is not in xt_qtaguid_print.c because of locks visibility */ > +static void prdebug_full_state(int indent_level, const char *fmt, ...) > +{ > + va_list args; > + char *fmt_buff; > + char *buff; > + > + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) > + return; > + > + fmt_buff = kasprintf(GFP_ATOMIC, > + "qtaguid: %s(): %s {\n", __func__, fmt); > + BUG_ON(!fmt_buff); > + va_start(args, fmt); > + buff = kvasprintf(GFP_ATOMIC, > + fmt_buff, args); > + BUG_ON(!buff); > + pr_debug("%s", buff); > + kfree(fmt_buff); > + kfree(buff); > + va_end(args); > + > + spin_lock_bh(&sock_tag_list_lock); > + prdebug_sock_tag_tree(indent_level, &sock_tag_tree); > + spin_unlock_bh(&sock_tag_list_lock); > + > + spin_lock_bh(&sock_tag_list_lock); > + spin_lock_bh(&uid_tag_data_tree_lock); > + prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree); > + prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree); > + spin_unlock_bh(&uid_tag_data_tree_lock); > + spin_unlock_bh(&sock_tag_list_lock); > + > + spin_lock_bh(&iface_stat_list_lock); > + prdebug_iface_stat_list(indent_level, &iface_stat_list); > + spin_unlock_bh(&iface_stat_list_lock); > + > + pr_debug("qtaguid: %s(): }\n", __func__); > +} > +#else > +static void prdebug_full_state(int indent_level, const char *fmt, ...) {} > +#endif > + > +/* > + * Procfs reader to get all active socket tags using style "1)" as described in > + * fs/proc/generic.c > + */ > +static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned, > + off_t items_to_skip, int char_count, int *eof, > + void *data) > +{ > + char *outp = page; > + int len; > + uid_t uid; > + struct rb_node *node; > + struct sock_tag *sock_tag_entry; > + int item_index = 0; > + int indent_level = 0; > + long f_count; > + > + if (unlikely(module_passive)) { > + *eof = 1; > + return 0; > + } > + > + if (*eof) > + return 0; > + > + CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n", > + page, items_to_skip, char_count, *eof); > + > + spin_lock_bh(&sock_tag_list_lock); > + for (node = rb_first(&sock_tag_tree); > + node; > + node = rb_next(node)) { > + if (item_index++ < items_to_skip) > + continue; > + sock_tag_entry = rb_entry(node, struct sock_tag, sock_node); > + uid = get_uid_from_tag(sock_tag_entry->tag); > + CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) " > + "pid=%u\n", > + sock_tag_entry->sk, > + sock_tag_entry->tag, > + uid, > + sock_tag_entry->pid > + ); > + f_count = atomic_long_read( > + &sock_tag_entry->socket->file->f_count); > + len = snprintf(outp, char_count, > + "sock=%p tag=0x%llx (uid=%u) pid=%u " > + "f_count=%lu\n", > + sock_tag_entry->sk, > + sock_tag_entry->tag, uid, > + sock_tag_entry->pid, f_count); > + if (len >= char_count) { > + spin_unlock_bh(&sock_tag_list_lock); > + *outp = '\0'; > + return outp - page; > + } > + outp += len; > + char_count -= len; > + (*num_items_returned)++; > + } > + spin_unlock_bh(&sock_tag_list_lock); > + > + if (item_index++ >= items_to_skip) { > + len = snprintf(outp, char_count, > + "events: sockets_tagged=%llu " > + "sockets_untagged=%llu " > + "counter_set_changes=%llu " > + "delete_cmds=%llu " > + "iface_events=%llu " > + "match_calls=%llu " > + "match_found_sk=%llu " > + "match_found_sk_in_ct=%llu " > + "match_found_no_sk_in_ct=%llu " > + "match_no_sk=%llu " > + "match_no_sk_file=%llu\n", > + atomic64_read(&qtu_events.sockets_tagged), > + atomic64_read(&qtu_events.sockets_untagged), > + atomic64_read(&qtu_events.counter_set_changes), > + atomic64_read(&qtu_events.delete_cmds), > + atomic64_read(&qtu_events.iface_events), > + atomic64_read(&qtu_events.match_calls), > + atomic64_read(&qtu_events.match_found_sk), > + atomic64_read(&qtu_events.match_found_sk_in_ct), > + atomic64_read( > + &qtu_events.match_found_no_sk_in_ct), > + atomic64_read(&qtu_events.match_no_sk), > + atomic64_read(&qtu_events.match_no_sk_file)); > + if (len >= char_count) { > + *outp = '\0'; > + return outp - page; > + } > + outp += len; > + char_count -= len; > + (*num_items_returned)++; > + } > + > + /* Count the following as part of the last item_index */ > + if (item_index > items_to_skip) { > + prdebug_full_state(indent_level, "proc ctrl"); > + } > + > + *eof = 1; > + return outp - page; > +} > + > +/* > + * Delete socket tags, and stat tags associated with a given > + * accouting tag and uid. > + */ > +static int ctrl_cmd_delete(const char *input) > +{ > + char cmd; > + uid_t uid; > + uid_t entry_uid; > + tag_t acct_tag; > + tag_t tag; > + int res, argc; > + struct iface_stat *iface_entry; > + struct rb_node *node; > + struct sock_tag *st_entry; > + struct rb_root st_to_free_tree = RB_ROOT; > + struct tag_stat *ts_entry; > + struct tag_counter_set *tcs_entry; > + struct tag_ref *tr_entry; > + struct uid_tag_data *utd_entry; > + > + argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid); > + CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c " > + "user_tag=0x%llx uid=%u\n", input, argc, cmd, > + acct_tag, uid); > + if (argc < 2) { > + res = -EINVAL; > + goto err; > + } > + if (!valid_atag(acct_tag)) { > + pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input); > + res = -EINVAL; > + goto err; > + } > + if (argc < 3) { > + uid = current_fsuid(); > + } else if (!can_impersonate_uid(uid)) { > + pr_info("qtaguid: ctrl_delete(%s): " > + "insufficient priv from pid=%u tgid=%u uid=%u\n", > + input, current->pid, current->tgid, current_fsuid()); > + res = -EPERM; > + goto err; > + } > + > + tag = combine_atag_with_uid(acct_tag, uid); > + CT_DEBUG("qtaguid: ctrl_delete(%s): " > + "looking for tag=0x%llx (uid=%u)\n", > + input, tag, uid); > + > + /* Delete socket tags */ > + spin_lock_bh(&sock_tag_list_lock); > + node = rb_first(&sock_tag_tree); > + while (node) { > + st_entry = rb_entry(node, struct sock_tag, sock_node); > + entry_uid = get_uid_from_tag(st_entry->tag); > + node = rb_next(node); > + if (entry_uid != uid) > + continue; > + > + CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n", > + input, st_entry->tag, entry_uid); > + > + if (!acct_tag || st_entry->tag == tag) { > + rb_erase(&st_entry->sock_node, &sock_tag_tree); > + /* Can't sockfd_put() within spinlock, do it later. */ > + sock_tag_tree_insert(st_entry, &st_to_free_tree); > + tr_entry = lookup_tag_ref(st_entry->tag, NULL); > + BUG_ON(tr_entry->num_sock_tags <= 0); > + tr_entry->num_sock_tags--; > + /* > + * TODO: remove if, and start failing. > + * This is a hack to work around the fact that in some > + * places we have "if (IS_ERR_OR_NULL(pqd_entry))" > + * and are trying to work around apps > + * that didn't open the /dev/xt_qtaguid. > + */ > + if (st_entry->list.next && st_entry->list.prev) > + list_del(&st_entry->list); > + } > + } > + spin_unlock_bh(&sock_tag_list_lock); > + > + sock_tag_tree_erase(&st_to_free_tree); > + > + /* Delete tag counter-sets */ > + spin_lock_bh(&tag_counter_set_list_lock); > + /* Counter sets are only on the uid tag, not full tag */ > + tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag); > + if (tcs_entry) { > + CT_DEBUG("qtaguid: ctrl_delete(%s): " > + "erase tcs: tag=0x%llx (uid=%u) set=%d\n", > + input, > + tcs_entry->tn.tag, > + get_uid_from_tag(tcs_entry->tn.tag), > + tcs_entry->active_set); > + rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree); > + kfree(tcs_entry); > + } > + spin_unlock_bh(&tag_counter_set_list_lock); > + > + /* > + * If acct_tag is 0, then all entries belonging to uid are > + * erased. > + */ > + spin_lock_bh(&iface_stat_list_lock); > + list_for_each_entry(iface_entry, &iface_stat_list, list) { > + spin_lock_bh(&iface_entry->tag_stat_list_lock); > + node = rb_first(&iface_entry->tag_stat_tree); > + while (node) { > + ts_entry = rb_entry(node, struct tag_stat, tn.node); > + entry_uid = get_uid_from_tag(ts_entry->tn.tag); > + node = rb_next(node); > + > + CT_DEBUG("qtaguid: ctrl_delete(%s): " > + "ts tag=0x%llx (uid=%u)\n", > + input, ts_entry->tn.tag, entry_uid); > + > + if (entry_uid != uid) > + continue; > + if (!acct_tag || ts_entry->tn.tag == tag) { > + CT_DEBUG("qtaguid: ctrl_delete(%s): " > + "erase ts: %s 0x%llx %u\n", > + input, iface_entry->ifname, > + get_atag_from_tag(ts_entry->tn.tag), > + entry_uid); > + rb_erase(&ts_entry->tn.node, > + &iface_entry->tag_stat_tree); > + kfree(ts_entry); > + } > + } > + spin_unlock_bh(&iface_entry->tag_stat_list_lock); > + } > + spin_unlock_bh(&iface_stat_list_lock); > + > + /* Cleanup the uid_tag_data */ > + spin_lock_bh(&uid_tag_data_tree_lock); > + node = rb_first(&uid_tag_data_tree); > + while (node) { > + utd_entry = rb_entry(node, struct uid_tag_data, node); > + entry_uid = utd_entry->uid; > + node = rb_next(node); > + > + CT_DEBUG("qtaguid: ctrl_delete(%s): " > + "utd uid=%u\n", > + input, entry_uid); > + > + if (entry_uid != uid) > + continue; > + /* > + * Go over the tag_refs, and those that don't have > + * sock_tags using them are freed. > + */ > + put_tag_ref_tree(tag, utd_entry); > + put_utd_entry(utd_entry); > + } > + spin_unlock_bh(&uid_tag_data_tree_lock); > + > + atomic64_inc(&qtu_events.delete_cmds); > + res = 0; > + > +err: > + return res; > +} > + > +static int ctrl_cmd_counter_set(const char *input) > +{ > + char cmd; > + uid_t uid = 0; > + tag_t tag; > + int res, argc; > + struct tag_counter_set *tcs; > + int counter_set; > + > + argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid); > + CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c " > + "set=%d uid=%u\n", input, argc, cmd, > + counter_set, uid); > + if (argc != 3) { > + res = -EINVAL; > + goto err; > + } > + if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) { > + pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n", > + input); > + res = -EINVAL; > + goto err; > + } > + if (!can_manipulate_uids()) { > + pr_info("qtaguid: ctrl_counterset(%s): " > + "insufficient priv from pid=%u tgid=%u uid=%u\n", > + input, current->pid, current->tgid, current_fsuid()); > + res = -EPERM; > + goto err; > + } > + > + tag = make_tag_from_uid(uid); > + spin_lock_bh(&tag_counter_set_list_lock); > + tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag); > + if (!tcs) { > + tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC); > + if (!tcs) { > + spin_unlock_bh(&tag_counter_set_list_lock); > + pr_err("qtaguid: ctrl_counterset(%s): " > + "failed to alloc counter set\n", > + input); > + res = -ENOMEM; > + goto err; > + } > + tcs->tn.tag = tag; > + tag_counter_set_tree_insert(tcs, &tag_counter_set_tree); > + CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx " > + "(uid=%u) set=%d\n", > + input, tag, get_uid_from_tag(tag), counter_set); > + } > + tcs->active_set = counter_set; > + spin_unlock_bh(&tag_counter_set_list_lock); > + atomic64_inc(&qtu_events.counter_set_changes); > + res = 0; > + > +err: > + return res; > +} > + > +static int ctrl_cmd_tag(const char *input) > +{ > + char cmd; > + int sock_fd = 0; > + uid_t uid = 0; > + tag_t acct_tag = make_atag_from_value(0); > + tag_t full_tag; > + struct socket *el_socket; > + int res, argc; > + struct sock_tag *sock_tag_entry; > + struct tag_ref *tag_ref_entry; > + struct uid_tag_data *uid_tag_data_entry; > + struct proc_qtu_data *pqd_entry; > + > + /* Unassigned args will get defaulted later. */ > + argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid); > + CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d " > + "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd, > + acct_tag, uid); > + if (argc < 2) { > + res = -EINVAL; > + goto err; > + } > + el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */ > + if (!el_socket) { > + pr_info("qtaguid: ctrl_tag(%s): failed to lookup" > + " sock_fd=%d err=%d\n", input, sock_fd, res); > + goto err; > + } > + CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n", > + input, atomic_long_read(&el_socket->file->f_count), > + el_socket->sk); > + if (argc < 3) { > + acct_tag = make_atag_from_value(0); > + } else if (!valid_atag(acct_tag)) { > + pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input); > + res = -EINVAL; > + goto err_put; > + } > + CT_DEBUG("qtaguid: ctrl_tag(%s): " > + "pid=%u tgid=%u uid=%u euid=%u fsuid=%u " > + "in_group=%d in_egroup=%d\n", > + input, current->pid, current->tgid, current_uid(), > + current_euid(), current_fsuid(), > + in_group_p(proc_ctrl_write_gid), > + in_egroup_p(proc_ctrl_write_gid)); > + if (argc < 4) { > + uid = current_fsuid(); > + } else if (!can_impersonate_uid(uid)) { > + pr_info("qtaguid: ctrl_tag(%s): " > + "insufficient priv from pid=%u tgid=%u uid=%u\n", > + input, current->pid, current->tgid, current_fsuid()); > + res = -EPERM; > + goto err_put; > + } > + full_tag = combine_atag_with_uid(acct_tag, uid); > + > + spin_lock_bh(&sock_tag_list_lock); > + sock_tag_entry = get_sock_stat_nl(el_socket->sk); > + tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry); > + if (IS_ERR(tag_ref_entry)) { > + res = PTR_ERR(tag_ref_entry); > + spin_unlock_bh(&sock_tag_list_lock); > + goto err_put; > + } > + tag_ref_entry->num_sock_tags++; > + if (sock_tag_entry) { > + struct tag_ref *prev_tag_ref_entry; > + > + CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p " > + "st@%p ...->f_count=%ld\n", > + input, el_socket->sk, sock_tag_entry, > + atomic_long_read(&el_socket->file->f_count)); > + /* > + * This is a re-tagging, so release the sock_fd that was > + * locked at the time of the 1st tagging. > + * There is still the ref from this call's sockfd_lookup() so > + * it can be done within the spinlock. > + */ > + sockfd_put(sock_tag_entry->socket); > + prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, > + &uid_tag_data_entry); > + BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry)); > + BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0); > + prev_tag_ref_entry->num_sock_tags--; > + sock_tag_entry->tag = full_tag; > + } else { > + CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n", > + input, el_socket->sk); > + sock_tag_entry = kzalloc(sizeof(*sock_tag_entry), > + GFP_ATOMIC); > + if (!sock_tag_entry) { > + pr_err("qtaguid: ctrl_tag(%s): " > + "socket tag alloc failed\n", > + input); > + spin_unlock_bh(&sock_tag_list_lock); > + res = -ENOMEM; > + goto err_tag_unref_put; > + } > + sock_tag_entry->sk = el_socket->sk; > + sock_tag_entry->socket = el_socket; > + sock_tag_entry->pid = current->tgid; > + sock_tag_entry->tag = combine_atag_with_uid(acct_tag, > + uid); > + spin_lock_bh(&uid_tag_data_tree_lock); > + pqd_entry = proc_qtu_data_tree_search( > + &proc_qtu_data_tree, current->tgid); > + /* > + * TODO: remove if, and start failing. > + * At first, we want to catch user-space code that is not > + * opening the /dev/xt_qtaguid. > + */ > + if (IS_ERR_OR_NULL(pqd_entry)) > + pr_warn_once( > + "qtaguid: %s(): " > + "User space forgot to open /dev/xt_qtaguid? " > + "pid=%u tgid=%u uid=%u\n", __func__, > + current->pid, current->tgid, > + current_fsuid()); > + else > + list_add(&sock_tag_entry->list, > + &pqd_entry->sock_tag_list); > + spin_unlock_bh(&uid_tag_data_tree_lock); > + > + sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree); > + atomic64_inc(&qtu_events.sockets_tagged); > + } > + spin_unlock_bh(&sock_tag_list_lock); > + /* We keep the ref to the socket (file) until it is untagged */ > + CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n", > + input, sock_tag_entry, > + atomic_long_read(&el_socket->file->f_count)); > + return 0; > + > +err_tag_unref_put: > + BUG_ON(tag_ref_entry->num_sock_tags <= 0); > + tag_ref_entry->num_sock_tags--; > + free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry); > +err_put: > + CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n", > + input, atomic_long_read(&el_socket->file->f_count) - 1); > + /* Release the sock_fd that was grabbed by sockfd_lookup(). */ > + sockfd_put(el_socket); > + return res; > + > +err: > + CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input); > + return res; > +} > + > +static int ctrl_cmd_untag(const char *input) > +{ > + char cmd; > + int sock_fd = 0; > + struct socket *el_socket; > + int res, argc; > + struct sock_tag *sock_tag_entry; > + struct tag_ref *tag_ref_entry; > + struct uid_tag_data *utd_entry; > + struct proc_qtu_data *pqd_entry; > + > + argc = sscanf(input, "%c %d", &cmd, &sock_fd); > + CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n", > + input, argc, cmd, sock_fd); > + if (argc < 2) { > + res = -EINVAL; > + goto err; > + } > + el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */ > + if (!el_socket) { > + pr_info("qtaguid: ctrl_untag(%s): failed to lookup" > + " sock_fd=%d err=%d\n", input, sock_fd, res); > + goto err; > + } > + CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n", > + input, atomic_long_read(&el_socket->file->f_count), > + el_socket->sk); > + spin_lock_bh(&sock_tag_list_lock); > + sock_tag_entry = get_sock_stat_nl(el_socket->sk); > + if (!sock_tag_entry) { > + spin_unlock_bh(&sock_tag_list_lock); > + res = -EINVAL; > + goto err_put; > + } > + /* > + * The socket already belongs to the current process > + * so it can do whatever it wants to it. > + */ > + rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree); > + > + tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry); > + BUG_ON(!tag_ref_entry); > + BUG_ON(tag_ref_entry->num_sock_tags <= 0); > + spin_lock_bh(&uid_tag_data_tree_lock); > + pqd_entry = proc_qtu_data_tree_search( > + &proc_qtu_data_tree, current->tgid); > + /* > + * TODO: remove if, and start failing. > + * At first, we want to catch user-space code that is not > + * opening the /dev/xt_qtaguid. > + */ > + if (IS_ERR_OR_NULL(pqd_entry)) > + pr_warn_once("qtaguid: %s(): " > + "User space forgot to open /dev/xt_qtaguid? " > + "pid=%u tgid=%u uid=%u\n", __func__, > + current->pid, current->tgid, current_fsuid()); > + else > + list_del(&sock_tag_entry->list); > + spin_unlock_bh(&uid_tag_data_tree_lock); > + /* > + * We don't free tag_ref from the utd_entry here, > + * only during a cmd_delete(). > + */ > + tag_ref_entry->num_sock_tags--; > + spin_unlock_bh(&sock_tag_list_lock); > + /* > + * Release the sock_fd that was grabbed at tag time, > + * and once more for the sockfd_lookup() here. > + */ > + sockfd_put(sock_tag_entry->socket); > + CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n", > + input, sock_tag_entry, > + atomic_long_read(&el_socket->file->f_count) - 1); > + sockfd_put(el_socket); > + > + kfree(sock_tag_entry); > + atomic64_inc(&qtu_events.sockets_untagged); > + > + return 0; > + > +err_put: > + CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n", > + input, atomic_long_read(&el_socket->file->f_count) - 1); > + /* Release the sock_fd that was grabbed by sockfd_lookup(). */ > + sockfd_put(el_socket); > + return res; > + > +err: > + CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input); > + return res; > +} > + > +static int qtaguid_ctrl_parse(const char *input, int count) > +{ > + char cmd; > + int res; > + > + cmd = input[0]; > + /* Collect params for commands */ > + switch (cmd) { > + case 'd': > + res = ctrl_cmd_delete(input); > + break; > + > + case 's': > + res = ctrl_cmd_counter_set(input); > + break; > + > + case 't': > + res = ctrl_cmd_tag(input); > + break; > + > + case 'u': > + res = ctrl_cmd_untag(input); > + break; > + > + default: > + res = -EINVAL; > + goto err; > + } > + if (!res) > + res = count; > +err: > + CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res); > + return res; > +} > + > +#define MAX_QTAGUID_CTRL_INPUT_LEN 255 > +static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer, > + unsigned long count, void *data) > +{ > + char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN]; > + > + if (unlikely(module_passive)) > + return count; > + > + if (count >= MAX_QTAGUID_CTRL_INPUT_LEN) > + return -EINVAL; > + > + if (copy_from_user(input_buf, buffer, count)) > + return -EFAULT; > + > + input_buf[count] = '\0'; > + return qtaguid_ctrl_parse(input_buf, count); > +} > + > +struct proc_print_info { > + char *outp; > + char **num_items_returned; > + struct iface_stat *iface_entry; > + struct tag_stat *ts_entry; > + int item_index; > + int items_to_skip; > + int char_count; > +}; > + > +static int pp_stats_line(struct proc_print_info *ppi, int cnt_set) > +{ > + int len; > + struct data_counters *cnts; > + > + if (!ppi->item_index) { > + if (ppi->item_index++ < ppi->items_to_skip) > + return 0; > + len = snprintf(ppi->outp, ppi->char_count, > + "idx iface acct_tag_hex uid_tag_int cnt_set " > + "rx_bytes rx_packets " > + "tx_bytes tx_packets " > + "rx_tcp_bytes rx_tcp_packets " > + "rx_udp_bytes rx_udp_packets " > + "rx_other_bytes rx_other_packets " > + "tx_tcp_bytes tx_tcp_packets " > + "tx_udp_bytes tx_udp_packets " > + "tx_other_bytes tx_other_packets\n"); > + } else { > + tag_t tag = ppi->ts_entry->tn.tag; > + uid_t stat_uid = get_uid_from_tag(tag); > + > + if (!can_read_other_uid_stats(stat_uid)) { > + CT_DEBUG("qtaguid: stats line: " > + "%s 0x%llx %u: insufficient priv " > + "from pid=%u tgid=%u uid=%u\n", > + ppi->iface_entry->ifname, > + get_atag_from_tag(tag), stat_uid, > + current->pid, current->tgid, current_fsuid()); > + return 0; > + } > + if (ppi->item_index++ < ppi->items_to_skip) > + return 0; > + cnts = &ppi->ts_entry->counters; > + len = snprintf( > + ppi->outp, ppi->char_count, > + "%d %s 0x%llx %u %u " > + "%llu %llu " > + "%llu %llu " > + "%llu %llu " > + "%llu %llu " > + "%llu %llu " > + "%llu %llu " > + "%llu %llu " > + "%llu %llu\n", > + ppi->item_index, > + ppi->iface_entry->ifname, > + get_atag_from_tag(tag), > + stat_uid, > + cnt_set, > + dc_sum_bytes(cnts, cnt_set, IFS_RX), > + dc_sum_packets(cnts, cnt_set, IFS_RX), > + dc_sum_bytes(cnts, cnt_set, IFS_TX), > + dc_sum_packets(cnts, cnt_set, IFS_TX), > + cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes, > + cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets, > + cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes, > + cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets, > + cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes, > + cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets, > + cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes, > + cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets, > + cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes, > + cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets, > + cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes, > + cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets); > + } > + return len; > +} > + > +static bool pp_sets(struct proc_print_info *ppi) > +{ > + int len; > + int counter_set; > + for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS; > + counter_set++) { > + len = pp_stats_line(ppi, counter_set); > + if (len >= ppi->char_count) { > + *ppi->outp = '\0'; > + return false; > + } > + if (len) { > + ppi->outp += len; > + ppi->char_count -= len; > + (*ppi->num_items_returned)++; > + } > + } > + return true; > +} > + > +/* > + * Procfs reader to get all tag stats using style "1)" as described in > + * fs/proc/generic.c > + * Groups all protocols tx/rx bytes. > + */ > +static int qtaguid_stats_proc_read(char *page, char **num_items_returned, > + off_t items_to_skip, int char_count, int *eof, > + void *data) > +{ > + struct proc_print_info ppi; > + int len; > + > + ppi.outp = page; > + ppi.item_index = 0; > + ppi.char_count = char_count; > + ppi.num_items_returned = num_items_returned; > + ppi.items_to_skip = items_to_skip; > + > + if (unlikely(module_passive)) { > + len = pp_stats_line(&ppi, 0); > + /* The header should always be shorter than the buffer. */ > + BUG_ON(len >= ppi.char_count); > + (*num_items_returned)++; > + *eof = 1; > + return len; > + } > + > + CT_DEBUG("qtaguid:proc stats page=%p *num_items_returned=%p off=%ld " > + "char_count=%d *eof=%d\n", page, *num_items_returned, > + items_to_skip, char_count, *eof); > + > + if (*eof) > + return 0; > + > + /* The idx is there to help debug when things go belly up. */ > + len = pp_stats_line(&ppi, 0); > + /* Don't advance the outp unless the whole line was printed */ > + if (len >= ppi.char_count) { > + *ppi.outp = '\0'; > + return ppi.outp - page; > + } > + if (len) { > + ppi.outp += len; > + ppi.char_count -= len; > + (*num_items_returned)++; > + } > + > + spin_lock_bh(&iface_stat_list_lock); > + list_for_each_entry(ppi.iface_entry, &iface_stat_list, list) { > + struct rb_node *node; > + spin_lock_bh(&ppi.iface_entry->tag_stat_list_lock); > + for (node = rb_first(&ppi.iface_entry->tag_stat_tree); > + node; > + node = rb_next(node)) { > + ppi.ts_entry = rb_entry(node, struct tag_stat, tn.node); > + if (!pp_sets(&ppi)) { > + spin_unlock_bh( > + &ppi.iface_entry->tag_stat_list_lock); > + spin_unlock_bh(&iface_stat_list_lock); > + return ppi.outp - page; > + } > + } > + spin_unlock_bh(&ppi.iface_entry->tag_stat_list_lock); > + } > + spin_unlock_bh(&iface_stat_list_lock); > + > + *eof = 1; > + return ppi.outp - page; > +} > + > +/*------------------------------------------*/ > +static int qtudev_open(struct inode *inode, struct file *file) > +{ > + struct uid_tag_data *utd_entry; > + struct proc_qtu_data *pqd_entry; > + struct proc_qtu_data *new_pqd_entry; > + int res; > + bool utd_entry_found; > + > + if (unlikely(qtu_proc_handling_passive)) > + return 0; > + > + DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n", > + current->pid, current->tgid, current_fsuid()); > + > + spin_lock_bh(&uid_tag_data_tree_lock); > + > + /* Look for existing uid data, or alloc one. */ > + utd_entry = get_uid_data(current_fsuid(), &utd_entry_found); > + if (IS_ERR_OR_NULL(utd_entry)) { > + res = PTR_ERR(utd_entry); > + goto err; > + } > + > + /* Look for existing PID based proc_data */ > + pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree, > + current->tgid); > + if (pqd_entry) { > + pr_err("qtaguid: qtudev_open(): %u/%u %u " > + "%s already opened\n", > + current->pid, current->tgid, current_fsuid(), > + QTU_DEV_NAME); > + res = -EBUSY; > + goto err_unlock_free_utd; > + } > + > + new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC); > + if (!new_pqd_entry) { > + pr_err("qtaguid: qtudev_open(): %u/%u %u: " > + "proc data alloc failed\n", > + current->pid, current->tgid, current_fsuid()); > + res = -ENOMEM; > + goto err_unlock_free_utd; > + } > + new_pqd_entry->pid = current->tgid; > + INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list); > + new_pqd_entry->parent_tag_data = utd_entry; > + utd_entry->num_pqd++; > + > + proc_qtu_data_tree_insert(new_pqd_entry, > + &proc_qtu_data_tree); > + > + spin_unlock_bh(&uid_tag_data_tree_lock); > + DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n", > + current_fsuid(), new_pqd_entry); > + file->private_data = new_pqd_entry; > + return 0; > + > +err_unlock_free_utd: > + if (!utd_entry_found) { > + rb_erase(&utd_entry->node, &uid_tag_data_tree); > + kfree(utd_entry); > + } > + spin_unlock_bh(&uid_tag_data_tree_lock); > +err: > + return res; > +} > + > +static int qtudev_release(struct inode *inode, struct file *file) > +{ > + struct proc_qtu_data *pqd_entry = file->private_data; > + struct uid_tag_data *utd_entry = pqd_entry->parent_tag_data; > + struct sock_tag *st_entry; > + struct rb_root st_to_free_tree = RB_ROOT; > + struct list_head *entry, *next; > + struct tag_ref *tr; > + > + if (unlikely(qtu_proc_handling_passive)) > + return 0; > + > + /* > + * Do not trust the current->pid, it might just be a kworker cleaning > + * up after a dead proc. > + */ > + DR_DEBUG("qtaguid: qtudev_release(): " > + "pid=%u tgid=%u uid=%u " > + "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n", > + current->pid, current->tgid, pqd_entry->parent_tag_data->uid, > + pqd_entry, pqd_entry->pid, utd_entry, > + utd_entry->num_active_tags); > + > + spin_lock_bh(&sock_tag_list_lock); > + spin_lock_bh(&uid_tag_data_tree_lock); > + > + list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) { > + st_entry = list_entry(entry, struct sock_tag, list); > + DR_DEBUG("qtaguid: %s(): " > + "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n", > + __func__, > + st_entry, st_entry->sk, > + current->pid, current->tgid, > + pqd_entry->parent_tag_data->uid); > + > + utd_entry = uid_tag_data_tree_search( > + &uid_tag_data_tree, > + get_uid_from_tag(st_entry->tag)); > + BUG_ON(IS_ERR_OR_NULL(utd_entry)); > + DR_DEBUG("qtaguid: %s(): " > + "looking for tag=0x%llx in utd_entry=%p\n", __func__, > + st_entry->tag, utd_entry); > + tr = tag_ref_tree_search(&utd_entry->tag_ref_tree, > + st_entry->tag); > + BUG_ON(!tr); > + BUG_ON(tr->num_sock_tags <= 0); > + tr->num_sock_tags--; > + free_tag_ref_from_utd_entry(tr, utd_entry); > + > + rb_erase(&st_entry->sock_node, &sock_tag_tree); > + list_del(&st_entry->list); > + /* Can't sockfd_put() within spinlock, do it later. */ > + sock_tag_tree_insert(st_entry, &st_to_free_tree); > + > + /* > + * Try to free the utd_entry if no other proc_qtu_data is > + * using it (num_pqd is 0) and it doesn't have active tags > + * (num_active_tags is 0). > + */ > + put_utd_entry(utd_entry); > + } > + > + rb_erase(&pqd_entry->node, &proc_qtu_data_tree); > + BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1); > + pqd_entry->parent_tag_data->num_pqd--; > + put_utd_entry(pqd_entry->parent_tag_data); > + kfree(pqd_entry); > + file->private_data = NULL; > + > + spin_unlock_bh(&uid_tag_data_tree_lock); > + spin_unlock_bh(&sock_tag_list_lock); > + > + > + sock_tag_tree_erase(&st_to_free_tree); > + > + prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__, > + current->pid, current->tgid); > + return 0; > +} > + > +/*------------------------------------------*/ > +static const struct file_operations qtudev_fops = { > + .owner = THIS_MODULE, > + .open = qtudev_open, > + .release = qtudev_release, > +}; > + > +static struct miscdevice qtu_device = { > + .minor = MISC_DYNAMIC_MINOR, > + .name = QTU_DEV_NAME, > + .fops = &qtudev_fops, > + /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */ > +}; > + > +/*------------------------------------------*/ > +static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir) > +{ > + int ret; > + *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net); > + if (!*res_procdir) { > + pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n"); > + ret = -ENOMEM; > + goto no_dir; > + } > + > + xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms, > + *res_procdir); > + if (!xt_qtaguid_ctrl_file) { > + pr_err("qtaguid: failed to create xt_qtaguid/ctrl " > + " file\n"); > + ret = -ENOMEM; > + goto no_ctrl_entry; > + } > + xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read; > + xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write; > + > + xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms, > + *res_procdir); > + if (!xt_qtaguid_stats_file) { > + pr_err("qtaguid: failed to create xt_qtaguid/stats " > + "file\n"); > + ret = -ENOMEM; > + goto no_stats_entry; > + } > + xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read; > + /* > + * TODO: add support counter hacking > + * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write; > + */ > + return 0; > + > +no_stats_entry: > + remove_proc_entry("ctrl", *res_procdir); > +no_ctrl_entry: > + remove_proc_entry("xt_qtaguid", NULL); > +no_dir: > + return ret; > +} > + > +static struct xt_match qtaguid_mt_reg __read_mostly = { > + /* > + * This module masquerades as the "owner" module so that iptables > + * tools can deal with it. > + */ > + .name = "owner", > + .revision = 1, > + .family = NFPROTO_UNSPEC, > + .match = qtaguid_mt, > + .matchsize = sizeof(struct xt_qtaguid_match_info), > + .me = THIS_MODULE, > +}; > + > +static int __init qtaguid_mt_init(void) > +{ > + if (qtaguid_proc_register(&xt_qtaguid_procdir) > + || iface_stat_init(xt_qtaguid_procdir) > + || xt_register_match(&qtaguid_mt_reg) > + || misc_register(&qtu_device)) > + return -1; > + return 0; > +} > + > +/* > + * TODO: allow unloading of the module. > + * For now stats are permanent. > + * Kconfig forces'y/n' and never an 'm'. > + */ > + > +module_init(qtaguid_mt_init); > +MODULE_AUTHOR("jpa <jpa@...gle.com>"); > +MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats"); > +MODULE_LICENSE("GPL"); > +MODULE_ALIAS("ipt_owner"); > +MODULE_ALIAS("ip6t_owner"); > +MODULE_ALIAS("ipt_qtaguid"); > +MODULE_ALIAS("ip6t_qtaguid"); > diff --git a/net/netfilter/xt_qtaguid_internal.h b/net/netfilter/xt_qtaguid_internal.h > new file mode 100644 > index 0000000..02479d6 > --- /dev/null > +++ b/net/netfilter/xt_qtaguid_internal.h > @@ -0,0 +1,330 @@ > +/* > + * Kernel iptables module to track stats for packets based on user tags. > + * > + * (C) 2011 Google, Inc > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 as > + * published by the Free Software Foundation. > + */ > +#ifndef __XT_QTAGUID_INTERNAL_H__ > +#define __XT_QTAGUID_INTERNAL_H__ > + > +#include <linux/types.h> > +#include <linux/rbtree.h> > +#include <linux/spinlock_types.h> > +#include <linux/workqueue.h> > + > +/* Iface handling */ > +#define IDEBUG_MASK (1<<0) > +/* Iptable Matching. Per packet. */ > +#define MDEBUG_MASK (1<<1) > +/* Red-black tree handling. Per packet. */ > +#define RDEBUG_MASK (1<<2) > +/* procfs ctrl/stats handling */ > +#define CDEBUG_MASK (1<<3) > +/* dev and resource tracking */ > +#define DDEBUG_MASK (1<<4) > + > +/* E.g (IDEBUG_MASK | CDEBUG_MASK | DDEBUG_MASK) */ > +#define DEFAULT_DEBUG_MASK 0 > + > +/* > + * (Un)Define these *DEBUG to compile out/in the pr_debug calls. > + * All undef: text size ~ 0x3030; all def: ~ 0x4404. > + */ > +#define IDEBUG > +#define MDEBUG > +#define RDEBUG > +#define CDEBUG > +#define DDEBUG > + > +#define MSK_DEBUG(mask, ...) do { \ > + if (unlikely(qtaguid_debug_mask & (mask))) \ > + pr_debug(__VA_ARGS__); \ > + } while (0) > +#ifdef IDEBUG > +#define IF_DEBUG(...) MSK_DEBUG(IDEBUG_MASK, __VA_ARGS__) > +#else > +#define IF_DEBUG(...) no_printk(__VA_ARGS__) > +#endif > +#ifdef MDEBUG > +#define MT_DEBUG(...) MSK_DEBUG(MDEBUG_MASK, __VA_ARGS__) > +#else > +#define MT_DEBUG(...) no_printk(__VA_ARGS__) > +#endif > +#ifdef RDEBUG > +#define RB_DEBUG(...) MSK_DEBUG(RDEBUG_MASK, __VA_ARGS__) > +#else > +#define RB_DEBUG(...) no_printk(__VA_ARGS__) > +#endif > +#ifdef CDEBUG > +#define CT_DEBUG(...) MSK_DEBUG(CDEBUG_MASK, __VA_ARGS__) > +#else > +#define CT_DEBUG(...) no_printk(__VA_ARGS__) > +#endif > +#ifdef DDEBUG > +#define DR_DEBUG(...) MSK_DEBUG(DDEBUG_MASK, __VA_ARGS__) > +#else > +#define DR_DEBUG(...) no_printk(__VA_ARGS__) > +#endif > + > +extern uint qtaguid_debug_mask; > + > +/*---------------------------------------------------------------------------*/ > +/* > + * Tags: > + * > + * They represent what the data usage counters will be tracked against. > + * By default a tag is just based on the UID. > + * The UID is used as the base for policing, and can not be ignored. > + * So a tag will always at least represent a UID (uid_tag). > + * > + * A tag can be augmented with an "accounting tag" which is associated > + * with a UID. > + * User space can set the acct_tag portion of the tag which is then used > + * with sockets: all data belonging to that socket will be counted against the > + * tag. The policing is then based on the tag's uid_tag portion, > + * and stats are collected for the acct_tag portion separately. > + * > + * There could be > + * a: {acct_tag=1, uid_tag=10003} > + * b: {acct_tag=2, uid_tag=10003} > + * c: {acct_tag=3, uid_tag=10003} > + * d: {acct_tag=0, uid_tag=10003} > + * a, b, and c represent tags associated with specific sockets. > + * d is for the totals for that uid, including all untagged traffic. > + * Typically d is used with policing/quota rules. > + * > + * We want tag_t big enough to distinguish uid_t and acct_tag. > + * It might become a struct if needed. > + * Nothing should be using it as an int. > + */ > +typedef uint64_t tag_t; /* Only used via accessors */ > + > +#define TAG_UID_MASK 0xFFFFFFFFULL > +#define TAG_ACCT_MASK (~0xFFFFFFFFULL) > + > +static inline int tag_compare(tag_t t1, tag_t t2) > +{ > + return t1 < t2 ? -1 : t1 == t2 ? 0 : 1; > +} > + > +static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid) > +{ > + return acct_tag | uid; > +} > +static inline tag_t make_tag_from_uid(uid_t uid) > +{ > + return uid; > +} > +static inline uid_t get_uid_from_tag(tag_t tag) > +{ > + return tag & TAG_UID_MASK; > +} > +static inline tag_t get_utag_from_tag(tag_t tag) > +{ > + return tag & TAG_UID_MASK; > +} > +static inline tag_t get_atag_from_tag(tag_t tag) > +{ > + return tag & TAG_ACCT_MASK; > +} > + > +static inline bool valid_atag(tag_t tag) > +{ > + return !(tag & TAG_UID_MASK); > +} > +static inline tag_t make_atag_from_value(uint32_t value) > +{ > + return (uint64_t)value << 32; > +} > +/*---------------------------------------------------------------------------*/ > + > +/* > + * Maximum number of socket tags that a UID is allowed to have active. > + * Multiple processes belonging to the same UID contribute towards this limit. > + * Special UIDs that can impersonate a UID also contribute (e.g. download > + * manager, ...) > + */ > +#define DEFAULT_MAX_SOCK_TAGS 1024 > + > +/* > + * For now we only track 2 sets of counters. > + * The default set is 0. > + * Userspace can activate another set for a given uid being tracked. > + */ > +#define IFS_MAX_COUNTER_SETS 2 > + > +enum ifs_tx_rx { > + IFS_TX, > + IFS_RX, > + IFS_MAX_DIRECTIONS > +}; > + > +/* For now, TCP, UDP, the rest */ > +enum ifs_proto { > + IFS_TCP, > + IFS_UDP, > + IFS_PROTO_OTHER, > + IFS_MAX_PROTOS > +}; > + > +struct byte_packet_counters { > + uint64_t bytes; > + uint64_t packets; > +}; > + > +struct data_counters { > + struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS]; > +}; > + > +/* Generic X based nodes used as a base for rb_tree ops */ > +struct tag_node { > + struct rb_node node; > + tag_t tag; > +}; > + > +struct tag_stat { > + struct tag_node tn; > + struct data_counters counters; > + /* > + * If this tag is acct_tag based, we need to count against the > + * matching parent uid_tag. > + */ > + struct data_counters *parent_counters; > +}; > + > +struct iface_stat { > + struct list_head list; /* in iface_stat_list */ > + char *ifname; > + bool active; > + /* net_dev is only valid for active iface_stat */ > + struct net_device *net_dev; > + > + struct byte_packet_counters totals[IFS_MAX_DIRECTIONS]; > + /* > + * We keep the last_known, because some devices reset their counters > + * just before NETDEV_UP, while some will reset just before > + * NETDEV_REGISTER (which is more normal). > + * So now, if the device didn't do a NETDEV_UNREGISTER and we see > + * its current dev stats smaller that what was previously known, we > + * assume an UNREGISTER and just use the last_known. > + */ > + struct byte_packet_counters last_known[IFS_MAX_DIRECTIONS]; > + /* last_known is usable when last_known_valid is true */ > + bool last_known_valid; > + > + struct proc_dir_entry *proc_ptr; > + > + struct rb_root tag_stat_tree; > + spinlock_t tag_stat_list_lock; > +}; > + > +/* This is needed to create proc_dir_entries from atomic context. */ > +struct iface_stat_work { > + struct work_struct iface_work; > + struct iface_stat *iface_entry; > +}; > + > +/* > + * Track tag that this socket is transferring data for, and not necessarily > + * the uid that owns the socket. > + * This is the tag against which tag_stat.counters will be billed. > + * These structs need to be looked up by sock and pid. > + */ > +struct sock_tag { > + struct rb_node sock_node; > + struct sock *sk; /* Only used as a number, never dereferenced */ > + /* The socket is needed for sockfd_put() */ > + struct socket *socket; > + /* Used to associate with a given pid */ > + struct list_head list; /* in proc_qtu_data.sock_tag_list */ > + pid_t pid; > + > + tag_t tag; > +}; > + > +struct qtaguid_event_counts { > + /* Various successful events */ > + atomic64_t sockets_tagged; > + atomic64_t sockets_untagged; > + atomic64_t counter_set_changes; > + atomic64_t delete_cmds; > + atomic64_t iface_events; /* Number of NETDEV_* events handled */ > + > + atomic64_t match_calls; /* Number of times iptables called mt */ > + /* > + * match_found_sk_*: numbers related to the netfilter matching > + * function finding a sock for the sk_buff. > + * Total skbs processed is sum(match_found*). > + */ > + atomic64_t match_found_sk; /* An sk was already in the sk_buff. */ > + /* The connection tracker had or didn't have the sk. */ > + atomic64_t match_found_sk_in_ct; > + atomic64_t match_found_no_sk_in_ct; > + /* > + * No sk could be found. No apparent owner. Could happen with > + * unsolicited traffic. > + */ > + atomic64_t match_no_sk; > + /* > + * The file ptr in the sk_socket wasn't there. > + * This might happen for traffic while the socket is being closed. > + */ > + atomic64_t match_no_sk_file; > +}; > + > +/* Track the set active_set for the given tag. */ > +struct tag_counter_set { > + struct tag_node tn; > + int active_set; > +}; > + > +/*----------------------------------------------*/ > +/* > + * The qtu uid data is used to track resources that are created directly or > + * indirectly by processes (uid tracked). > + * It is shared by the processes with the same uid. > + * Some of the resource will be counted to prevent further rogue allocations, > + * some will need freeing once the owner process (uid) exits. > + */ > +struct uid_tag_data { > + struct rb_node node; > + uid_t uid; > + > + /* > + * For the uid, how many accounting tags have been set. > + */ > + int num_active_tags; > + /* Track the number of proc_qtu_data that reference it */ > + int num_pqd; > + struct rb_root tag_ref_tree; > + /* No tag_node_tree_lock; use uid_tag_data_tree_lock */ > +}; > + > +struct tag_ref { > + struct tag_node tn; > + > + /* > + * This tracks the number of active sockets that have a tag on them > + * which matches this tag_ref.tn.tag. > + * A tag ref can live on after the sockets are untagged. > + * A tag ref can only be removed during a tag delete command. > + */ > + int num_sock_tags; > +}; > + > +struct proc_qtu_data { > + struct rb_node node; > + pid_t pid; > + > + struct uid_tag_data *parent_tag_data; > + > + /* Tracks the sock_tags that need freeing upon this proc's death */ > + struct list_head sock_tag_list; > + /* No spinlock_t sock_tag_list_lock; use the global one. */ > +}; > + > +/*----------------------------------------------*/ > +#endif /* ifndef __XT_QTAGUID_INTERNAL_H__ */ > diff --git a/net/netfilter/xt_qtaguid_print.c b/net/netfilter/xt_qtaguid_print.c > new file mode 100644 > index 0000000..3917678 > --- /dev/null > +++ b/net/netfilter/xt_qtaguid_print.c > @@ -0,0 +1,556 @@ > +/* > + * Pretty printing Support for iptables xt_qtaguid module. > + * > + * (C) 2011 Google, Inc > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 as > + * published by the Free Software Foundation. > + */ > + > +/* > + * Most of the functions in this file just waste time if DEBUG is not defined. > + * The matching xt_qtaguid_print.h will static inline empty funcs if the needed > + * debug flags ore not defined. > + * Those funcs that fail to allocate memory will panic as there is no need to > + * hobble allong just pretending to do the requested work. > + */ > + > +#define DEBUG > + > +#include <linux/fs.h> > +#include <linux/gfp.h> > +#include <linux/net.h> > +#include <linux/rbtree.h> > +#include <linux/slab.h> > +#include <linux/spinlock_types.h> > + > + > +#include "xt_qtaguid_internal.h" > +#include "xt_qtaguid_print.h" > + > +#ifdef DDEBUG > + > +static void _bug_on_err_or_null(void *ptr) > +{ > + if (IS_ERR_OR_NULL(ptr)) { > + pr_err("qtaguid: kmalloc failed\n"); > + BUG(); > + } > +} > + > +char *pp_tag_t(tag_t *tag) > +{ > + char *res; > + > + if (!tag) > + res = kasprintf(GFP_ATOMIC, "tag_t@...l{}"); > + else > + res = kasprintf(GFP_ATOMIC, > + "tag_t@...tag=0x%llx, uid=%u}", > + tag, *tag, get_uid_from_tag(*tag)); > + _bug_on_err_or_null(res); > + return res; > +} > + > +char *pp_data_counters(struct data_counters *dc, bool showValues) > +{ > + char *res; > + > + if (!dc) > + res = kasprintf(GFP_ATOMIC, "data_counters@...l{}"); > + else if (showValues) > + res = kasprintf( > + GFP_ATOMIC, "data_counters@%p{" > + "set0{" > + "rx{" > + "tcp{b=%llu, p=%llu}, " > + "udp{b=%llu, p=%llu}," > + "other{b=%llu, p=%llu}}, " > + "tx{" > + "tcp{b=%llu, p=%llu}, " > + "udp{b=%llu, p=%llu}," > + "other{b=%llu, p=%llu}}}, " > + "set1{" > + "rx{" > + "tcp{b=%llu, p=%llu}, " > + "udp{b=%llu, p=%llu}," > + "other{b=%llu, p=%llu}}, " > + "tx{" > + "tcp{b=%llu, p=%llu}, " > + "udp{b=%llu, p=%llu}," > + "other{b=%llu, p=%llu}}}}", > + dc, > + dc->bpc[0][IFS_RX][IFS_TCP].bytes, > + dc->bpc[0][IFS_RX][IFS_TCP].packets, > + dc->bpc[0][IFS_RX][IFS_UDP].bytes, > + dc->bpc[0][IFS_RX][IFS_UDP].packets, > + dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].bytes, > + dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].packets, > + dc->bpc[0][IFS_TX][IFS_TCP].bytes, > + dc->bpc[0][IFS_TX][IFS_TCP].packets, > + dc->bpc[0][IFS_TX][IFS_UDP].bytes, > + dc->bpc[0][IFS_TX][IFS_UDP].packets, > + dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].bytes, > + dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].packets, > + dc->bpc[1][IFS_RX][IFS_TCP].bytes, > + dc->bpc[1][IFS_RX][IFS_TCP].packets, > + dc->bpc[1][IFS_RX][IFS_UDP].bytes, > + dc->bpc[1][IFS_RX][IFS_UDP].packets, > + dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].bytes, > + dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].packets, > + dc->bpc[1][IFS_TX][IFS_TCP].bytes, > + dc->bpc[1][IFS_TX][IFS_TCP].packets, > + dc->bpc[1][IFS_TX][IFS_UDP].bytes, > + dc->bpc[1][IFS_TX][IFS_UDP].packets, > + dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].bytes, > + dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].packets); > + else > + res = kasprintf(GFP_ATOMIC, "data_counters@......}", dc); > + _bug_on_err_or_null(res); > + return res; > +} > + > +char *pp_tag_node(struct tag_node *tn) > +{ > + char *tag_str; > + char *res; > + > + if (!tn) { > + res = kasprintf(GFP_ATOMIC, "tag_node@...l{}"); > + _bug_on_err_or_null(res); > + return res; > + } > + tag_str = pp_tag_t(&tn->tag); > + res = kasprintf(GFP_ATOMIC, > + "tag_node@...tag=%s}", > + tn, tag_str); > + _bug_on_err_or_null(res); > + kfree(tag_str); > + return res; > +} > + > +char *pp_tag_ref(struct tag_ref *tr) > +{ > + char *tn_str; > + char *res; > + > + if (!tr) { > + res = kasprintf(GFP_ATOMIC, "tag_ref@...l{}"); > + _bug_on_err_or_null(res); > + return res; > + } > + tn_str = pp_tag_node(&tr->tn); > + res = kasprintf(GFP_ATOMIC, > + "tag_ref@...%s, num_sock_tags=%d}", > + tr, tn_str, tr->num_sock_tags); > + _bug_on_err_or_null(res); > + kfree(tn_str); > + return res; > +} > + > +char *pp_tag_stat(struct tag_stat *ts) > +{ > + char *tn_str; > + char *counters_str; > + char *parent_counters_str; > + char *res; > + > + if (!ts) { > + res = kasprintf(GFP_ATOMIC, "tag_stat@...l{}"); > + _bug_on_err_or_null(res); > + return res; > + } > + tn_str = pp_tag_node(&ts->tn); > + counters_str = pp_data_counters(&ts->counters, true); > + parent_counters_str = pp_data_counters(ts->parent_counters, false); > + res = kasprintf(GFP_ATOMIC, > + "tag_stat@...%s, counters=%s, parent_counters=%s}", > + ts, tn_str, counters_str, parent_counters_str); > + _bug_on_err_or_null(res); > + kfree(tn_str); > + kfree(counters_str); > + kfree(parent_counters_str); > + return res; > +} > + > +char *pp_iface_stat(struct iface_stat *is) > +{ > + char *res; > + if (!is) > + res = kasprintf(GFP_ATOMIC, "iface_stat@...l{}"); > + else > + res = kasprintf(GFP_ATOMIC, "iface_stat@%p{" > + "list=list_head{...}, " > + "ifname=%s, " > + "total={rx={bytes=%llu, " > + "packets=%llu}, " > + "tx={bytes=%llu, " > + "packets=%llu}}, " > + "last_known_valid=%d, " > + "last_known={rx={bytes=%llu, " > + "packets=%llu}, " > + "tx={bytes=%llu, " > + "packets=%llu}}, " > + "active=%d, " > + "net_dev=%p, " > + "proc_ptr=%p, " > + "tag_stat_tree=rb_root{...}}", > + is, > + is->ifname, > + is->totals[IFS_RX].bytes, > + is->totals[IFS_RX].packets, > + is->totals[IFS_TX].bytes, > + is->totals[IFS_TX].packets, > + is->last_known_valid, > + is->last_known[IFS_RX].bytes, > + is->last_known[IFS_RX].packets, > + is->last_known[IFS_TX].bytes, > + is->last_known[IFS_TX].packets, > + is->active, > + is->net_dev, > + is->proc_ptr); > + _bug_on_err_or_null(res); > + return res; > +} > + > +char *pp_sock_tag(struct sock_tag *st) > +{ > + char *tag_str; > + char *res; > + > + if (!st) { > + res = kasprintf(GFP_ATOMIC, "sock_tag@...l{}"); > + _bug_on_err_or_null(res); > + return res; > + } > + tag_str = pp_tag_t(&st->tag); > + res = kasprintf(GFP_ATOMIC, "sock_tag@%p{" > + "sock_node=rb_node{...}, " > + "sk=%p socket=%p (f_count=%lu), list=list_head{...}, " > + "pid=%u, tag=%s}", > + st, st->sk, st->socket, atomic_long_read( > + &st->socket->file->f_count), > + st->pid, tag_str); > + _bug_on_err_or_null(res); > + kfree(tag_str); > + return res; > +} > + > +char *pp_uid_tag_data(struct uid_tag_data *utd) > +{ > + char *res; > + > + if (!utd) > + res = kasprintf(GFP_ATOMIC, "uid_tag_data@...l{}"); > + else > + res = kasprintf(GFP_ATOMIC, "uid_tag_data@%p{" > + "uid=%u, num_active_acct_tags=%d, " > + "num_pqd=%d, " > + "tag_node_tree=rb_root{...}, " > + "proc_qtu_data_tree=rb_root{...}}", > + utd, utd->uid, > + utd->num_active_tags, utd->num_pqd); > + _bug_on_err_or_null(res); > + return res; > +} > + > +char *pp_proc_qtu_data(struct proc_qtu_data *pqd) > +{ > + char *parent_tag_data_str; > + char *res; > + > + if (!pqd) { > + res = kasprintf(GFP_ATOMIC, "proc_qtu_data@...l{}"); > + _bug_on_err_or_null(res); > + return res; > + } > + parent_tag_data_str = pp_uid_tag_data(pqd->parent_tag_data); > + res = kasprintf(GFP_ATOMIC, "proc_qtu_data@%p{" > + "node=rb_node{...}, pid=%u, " > + "parent_tag_data=%s, " > + "sock_tag_list=list_head{...}}", > + pqd, pqd->pid, parent_tag_data_str > + ); > + _bug_on_err_or_null(res); > + kfree(parent_tag_data_str); > + return res; > +} > + > +/*------------------------------------------*/ > +void prdebug_sock_tag_tree(int indent_level, > + struct rb_root *sock_tag_tree) > +{ > + struct rb_node *node; > + struct sock_tag *sock_tag_entry; > + char *str; > + > + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) > + return; > + > + if (RB_EMPTY_ROOT(sock_tag_tree)) { > + str = "sock_tag_tree=rb_root{}"; > + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); > + return; > + } > + > + str = "sock_tag_tree=rb_root{"; > + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); > + indent_level++; > + for (node = rb_first(sock_tag_tree); > + node; > + node = rb_next(node)) { > + sock_tag_entry = rb_entry(node, struct sock_tag, sock_node); > + str = pp_sock_tag(sock_tag_entry); > + pr_debug("%*d: %s,\n", indent_level*2, indent_level, str); > + kfree(str); > + } > + indent_level--; > + str = "}"; > + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); > +} > + > +void prdebug_sock_tag_list(int indent_level, > + struct list_head *sock_tag_list) > +{ > + struct sock_tag *sock_tag_entry; > + char *str; > + > + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) > + return; > + > + if (list_empty(sock_tag_list)) { > + str = "sock_tag_list=list_head{}"; > + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); > + return; > + } > + > + str = "sock_tag_list=list_head{"; > + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); > + indent_level++; > + list_for_each_entry(sock_tag_entry, sock_tag_list, list) { > + str = pp_sock_tag(sock_tag_entry); > + pr_debug("%*d: %s,\n", indent_level*2, indent_level, str); > + kfree(str); > + } > + indent_level--; > + str = "}"; > + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); > +} > + > +void prdebug_proc_qtu_data_tree(int indent_level, > + struct rb_root *proc_qtu_data_tree) > +{ > + char *str; > + struct rb_node *node; > + struct proc_qtu_data *proc_qtu_data_entry; > + > + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) > + return; > + > + if (RB_EMPTY_ROOT(proc_qtu_data_tree)) { > + str = "proc_qtu_data_tree=rb_root{}"; > + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); > + return; > + } > + > + str = "proc_qtu_data_tree=rb_root{"; > + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); > + indent_level++; > + for (node = rb_first(proc_qtu_data_tree); > + node; > + node = rb_next(node)) { > + proc_qtu_data_entry = rb_entry(node, > + struct proc_qtu_data, > + node); > + str = pp_proc_qtu_data(proc_qtu_data_entry); > + pr_debug("%*d: %s,\n", indent_level*2, indent_level, > + str); > + kfree(str); > + indent_level++; > + prdebug_sock_tag_list(indent_level, > + &proc_qtu_data_entry->sock_tag_list); > + indent_level--; > + > + } > + indent_level--; > + str = "}"; > + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); > +} > + > +void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree) > +{ > + char *str; > + struct rb_node *node; > + struct tag_ref *tag_ref_entry; > + > + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) > + return; > + > + if (RB_EMPTY_ROOT(tag_ref_tree)) { > + str = "tag_ref_tree{}"; > + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); > + return; > + } > + > + str = "tag_ref_tree{"; > + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); > + indent_level++; > + for (node = rb_first(tag_ref_tree); > + node; > + node = rb_next(node)) { > + tag_ref_entry = rb_entry(node, > + struct tag_ref, > + tn.node); > + str = pp_tag_ref(tag_ref_entry); > + pr_debug("%*d: %s,\n", indent_level*2, indent_level, > + str); > + kfree(str); > + } > + indent_level--; > + str = "}"; > + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); > +} > + > +void prdebug_uid_tag_data_tree(int indent_level, > + struct rb_root *uid_tag_data_tree) > +{ > + char *str; > + struct rb_node *node; > + struct uid_tag_data *uid_tag_data_entry; > + > + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) > + return; > + > + if (RB_EMPTY_ROOT(uid_tag_data_tree)) { > + str = "uid_tag_data_tree=rb_root{}"; > + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); > + return; > + } > + > + str = "uid_tag_data_tree=rb_root{"; > + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); > + indent_level++; > + for (node = rb_first(uid_tag_data_tree); > + node; > + node = rb_next(node)) { > + uid_tag_data_entry = rb_entry(node, struct uid_tag_data, > + node); > + str = pp_uid_tag_data(uid_tag_data_entry); > + pr_debug("%*d: %s,\n", indent_level*2, indent_level, str); > + kfree(str); > + if (!RB_EMPTY_ROOT(&uid_tag_data_entry->tag_ref_tree)) { > + indent_level++; > + prdebug_tag_ref_tree(indent_level, > + &uid_tag_data_entry->tag_ref_tree); > + indent_level--; > + } > + } > + indent_level--; > + str = "}"; > + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); > +} > + > +void prdebug_tag_stat_tree(int indent_level, > + struct rb_root *tag_stat_tree) > +{ > + char *str; > + struct rb_node *node; > + struct tag_stat *ts_entry; > + > + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) > + return; > + > + if (RB_EMPTY_ROOT(tag_stat_tree)) { > + str = "tag_stat_tree{}"; > + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); > + return; > + } > + > + str = "tag_stat_tree{"; > + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); > + indent_level++; > + for (node = rb_first(tag_stat_tree); > + node; > + node = rb_next(node)) { > + ts_entry = rb_entry(node, struct tag_stat, tn.node); > + str = pp_tag_stat(ts_entry); > + pr_debug("%*d: %s\n", indent_level*2, indent_level, > + str); > + kfree(str); > + } > + indent_level--; > + str = "}"; > + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); > +} > + > +void prdebug_iface_stat_list(int indent_level, > + struct list_head *iface_stat_list) > +{ > + char *str; > + struct iface_stat *iface_entry; > + > + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) > + return; > + > + if (list_empty(iface_stat_list)) { > + str = "iface_stat_list=list_head{}"; > + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); > + return; > + } > + > + str = "iface_stat_list=list_head{"; > + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); > + indent_level++; > + list_for_each_entry(iface_entry, iface_stat_list, list) { > + str = pp_iface_stat(iface_entry); > + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); > + kfree(str); > + > + spin_lock_bh(&iface_entry->tag_stat_list_lock); > + if (!RB_EMPTY_ROOT(&iface_entry->tag_stat_tree)) { > + indent_level++; > + prdebug_tag_stat_tree(indent_level, > + &iface_entry->tag_stat_tree); > + indent_level--; > + } > + spin_unlock_bh(&iface_entry->tag_stat_list_lock); > + } > + indent_level--; > + str = "}"; > + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); > +} > + > +#endif /* ifdef DDEBUG */ > +/*------------------------------------------*/ > +static const char * const netdev_event_strings[] = { > + "netdev_unknown", > + "NETDEV_UP", > + "NETDEV_DOWN", > + "NETDEV_REBOOT", > + "NETDEV_CHANGE", > + "NETDEV_REGISTER", > + "NETDEV_UNREGISTER", > + "NETDEV_CHANGEMTU", > + "NETDEV_CHANGEADDR", > + "NETDEV_GOING_DOWN", > + "NETDEV_CHANGENAME", > + "NETDEV_FEAT_CHANGE", > + "NETDEV_BONDING_FAILOVER", > + "NETDEV_PRE_UP", > + "NETDEV_PRE_TYPE_CHANGE", > + "NETDEV_POST_TYPE_CHANGE", > + "NETDEV_POST_INIT", > + "NETDEV_UNREGISTER_BATCH", > + "NETDEV_RELEASE", > + "NETDEV_NOTIFY_PEERS", > + "NETDEV_JOIN", > +}; > + > +const char *netdev_evt_str(int netdev_event) > +{ > + if (netdev_event < 0 > + || netdev_event >= ARRAY_SIZE(netdev_event_strings)) > + return "bad event num"; > + return netdev_event_strings[netdev_event]; > +} > diff --git a/net/netfilter/xt_qtaguid_print.h b/net/netfilter/xt_qtaguid_print.h > new file mode 100644 > index 0000000..b63871a > --- /dev/null > +++ b/net/netfilter/xt_qtaguid_print.h > @@ -0,0 +1,120 @@ > +/* > + * Pretty printing Support for iptables xt_qtaguid module. > + * > + * (C) 2011 Google, Inc > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 as > + * published by the Free Software Foundation. > + */ > +#ifndef __XT_QTAGUID_PRINT_H__ > +#define __XT_QTAGUID_PRINT_H__ > + > +#include "xt_qtaguid_internal.h" > + > +#ifdef DDEBUG > + > +char *pp_tag_t(tag_t *tag); > +char *pp_data_counters(struct data_counters *dc, bool showValues); > +char *pp_tag_node(struct tag_node *tn); > +char *pp_tag_ref(struct tag_ref *tr); > +char *pp_tag_stat(struct tag_stat *ts); > +char *pp_iface_stat(struct iface_stat *is); > +char *pp_sock_tag(struct sock_tag *st); > +char *pp_uid_tag_data(struct uid_tag_data *qtd); > +char *pp_proc_qtu_data(struct proc_qtu_data *pqd); > + > +/*------------------------------------------*/ > +void prdebug_sock_tag_list(int indent_level, > + struct list_head *sock_tag_list); > +void prdebug_sock_tag_tree(int indent_level, > + struct rb_root *sock_tag_tree); > +void prdebug_proc_qtu_data_tree(int indent_level, > + struct rb_root *proc_qtu_data_tree); > +void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree); > +void prdebug_uid_tag_data_tree(int indent_level, > + struct rb_root *uid_tag_data_tree); > +void prdebug_tag_stat_tree(int indent_level, > + struct rb_root *tag_stat_tree); > +void prdebug_iface_stat_list(int indent_level, > + struct list_head *iface_stat_list); > + > +#else > + > +/*------------------------------------------*/ > +static inline char *pp_tag_t(tag_t *tag) > +{ > + return NULL; > +} > +static inline char *pp_data_counters(struct data_counters *dc, bool showValues) > +{ > + return NULL; > +} > +static inline char *pp_tag_node(struct tag_node *tn) > +{ > + return NULL; > +} > +static inline char *pp_tag_ref(struct tag_ref *tr) > +{ > + return NULL; > +} > +static inline char *pp_tag_stat(struct tag_stat *ts) > +{ > + return NULL; > +} > +static inline char *pp_iface_stat(struct iface_stat *is) > +{ > + return NULL; > +} > +static inline char *pp_sock_tag(struct sock_tag *st) > +{ > + return NULL; > +} > +static inline char *pp_uid_tag_data(struct uid_tag_data *qtd) > +{ > + return NULL; > +} > +static inline char *pp_proc_qtu_data(struct proc_qtu_data *pqd) > +{ > + return NULL; > +} > + > +/*------------------------------------------*/ > +static inline > +void prdebug_sock_tag_list(int indent_level, > + struct list_head *sock_tag_list) > +{ > +} > +static inline > +void prdebug_sock_tag_tree(int indent_level, > + struct rb_root *sock_tag_tree) > +{ > +} > +static inline > +void prdebug_proc_qtu_data_tree(int indent_level, > + struct rb_root *proc_qtu_data_tree) > +{ > +} > +static inline > +void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree) > +{ > +} > +static inline > +void prdebug_uid_tag_data_tree(int indent_level, > + struct rb_root *uid_tag_data_tree) > +{ > +} > +static inline > +void prdebug_tag_stat_tree(int indent_level, > + struct rb_root *tag_stat_tree) > +{ > +} > +static inline > +void prdebug_iface_stat_list(int indent_level, > + struct list_head *iface_stat_list) > +{ > +} > +#endif > +/*------------------------------------------*/ > +const char *netdev_evt_str(int netdev_event); > +#endif /* ifndef __XT_QTAGUID_PRINT_H__ */ > -- > 1.7.9.5 -- Thanks, //richard -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@...r.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists