[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20171031124145.9667-11-bjorn.topel@gmail.com>
Date: Tue, 31 Oct 2017 13:41:41 +0100
From: Björn Töpel <bjorn.topel@...il.com>
To: bjorn.topel@...il.com, magnus.karlsson@...el.com,
alexander.h.duyck@...el.com, alexander.duyck@...il.com,
john.fastabend@...il.com, ast@...com, brouer@...hat.com,
michael.lundkvist@...csson.com, ravineet.singh@...csson.com,
daniel@...earbox.net, netdev@...r.kernel.org
Cc: Björn Töpel <bjorn.topel@...el.com>,
jesse.brandeburg@...el.com, anjali.singhai@...el.com,
rami.rosen@...el.com, jeffrey.b.shaw@...el.com,
ferruh.yigit@...el.com, qi.z.zhang@...el.com
Subject: [RFC PATCH 10/14] samples/tpacket4: added tpbench
From: Björn Töpel <bjorn.topel@...el.com>
The tpbench program is benchmarking TPACKET_V2 up to
TPACKET_V4. There's a bench_all.sh script that makes testing all
versions easier.
Note that zero-copy means binding the TPACKET_V4 socket to a certain
NIC hardware queue, so you'll need to steer your traffic to a certain
NIC hardware queue. Say that you'd like your UDP traffic from port
4242 to end up in queue 16. Here, we use ethtool for this:
ethtool -N p3p2 rx-flow-hash udp4 fn
ethtool -N p3p2 flow-type udp4 src-port 4242 dst-port 4242 \
action 16
running the benchmark in zero-copy mode can then be done using:
taskset -c 16 ./tpbench -i p3p2 --rxdrop --zerocopy 17
Note that the queue is one-based and not zero-based.
Signed-off-by: Björn Töpel <bjorn.topel@...el.com>
---
samples/tpacket4/Makefile | 12 +
samples/tpacket4/bench_all.sh | 28 +
samples/tpacket4/tpbench.c | 1253 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 1293 insertions(+)
create mode 100644 samples/tpacket4/Makefile
create mode 100755 samples/tpacket4/bench_all.sh
create mode 100644 samples/tpacket4/tpbench.c
diff --git a/samples/tpacket4/Makefile b/samples/tpacket4/Makefile
new file mode 100644
index 000000000000..1dd731ffe3e9
--- /dev/null
+++ b/samples/tpacket4/Makefile
@@ -0,0 +1,12 @@
+# kbuild trick to avoid linker error. Can be omitted if a module is built.
+obj- := dummy.o
+
+# List of programs to build
+hostprogs-y := tpbench
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
+
+HOSTCFLAGS_tpbench.o += -I$(objtree)/usr/include
+
+all: tpbench
diff --git a/samples/tpacket4/bench_all.sh b/samples/tpacket4/bench_all.sh
new file mode 100755
index 000000000000..8d7ee17e1682
--- /dev/null
+++ b/samples/tpacket4/bench_all.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+DIR=`dirname "${BASH_SOURCE[0]}"`
+
+IF=p3p2
+DURATION=60
+CORE=14
+ZC=17
+
+echo "You might want to change the parameters in ${BASH_SOURCE[0]}"
+echo "${IF} cpu${CORE} duration ${DURATION}s zc ${ZC}"
+
+sudo taskset -c ${CORE} timeout -s int ${DURATION} ${DIR}/tpbench -i ${IF} --version=2 --rxdrop
+sudo taskset -c ${CORE} timeout -s int ${DURATION} ${DIR}/tpbench -i ${IF} --version=3 --rxdrop
+sudo taskset -c ${CORE} timeout -s int ${DURATION} ${DIR}/tpbench -i ${IF} --version=4 --rxdrop
+sudo taskset -c ${CORE} timeout -s int ${DURATION} ${DIR}/tpbench -i ${IF} --version=4 --rxdrop --zerocopy ${ZC}
+
+sudo taskset -c ${CORE} timeout -s int ${DURATION} ${DIR}/tpbench -i ${IF} --version=2 --txonly
+sudo taskset -c ${CORE} timeout -s int ${DURATION} ${DIR}/tpbench -i ${IF} --version=3 --txonly
+sudo taskset -c ${CORE} timeout -s int ${DURATION} ${DIR}/tpbench -i ${IF} --version=4 --txonly
+sudo taskset -c ${CORE} timeout -s int ${DURATION} ${DIR}/tpbench -i ${IF} --version=4 --txonly --zerocopy ${ZC}
+
+sudo taskset -c ${CORE} timeout -s int ${DURATION} ${DIR}/tpbench -i ${IF} --version=2 --l2fwd
+sudo taskset -c ${CORE} timeout -s int ${DURATION} ${DIR}/tpbench -i ${IF} --version=3 --l2fwd
+sudo taskset -c ${CORE} timeout -s int ${DURATION} ${DIR}/tpbench -i ${IF} --version=4 --l2fwd
+sudo taskset -c ${CORE} timeout -s int ${DURATION} ${DIR}/tpbench -i ${IF} --version=4 --l2fwd --zerocopy ${ZC}
+
+
diff --git a/samples/tpacket4/tpbench.c b/samples/tpacket4/tpbench.c
new file mode 100644
index 000000000000..46fb83009e06
--- /dev/null
+++ b/samples/tpacket4/tpbench.c
@@ -0,0 +1,1253 @@
+/*
+ * tpbench
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/ether.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/shm.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#define BATCH_SIZE 64 /* process pace */
+
+#define NUM_BUFFERS 131072
+#define FRAME_SIZE 2048
+
+#define BLOCK_SIZE (1 << 22) /* V2/V3 */
+#define NUM_DESCS 4096 /* V4 */
+
+static unsigned long rx_npkts;
+static unsigned long tx_npkts;
+static unsigned long start_time;
+
+/* cli options */
+enum tpacket_version {
+ PV2 = 0,
+ PV3 = 1,
+ PV4 = 2,
+};
+
+enum benchmark_type {
+ BENCH_RXDROP = 0,
+ BENCH_TXONLY = 1,
+ BENCH_L2FWD = 2,
+};
+
+static enum tpacket_version opt_tpver = PV4;
+static enum benchmark_type opt_bench = BENCH_RXDROP;
+static const char *opt_if = "";
+static int opt_zerocopy;
+
+struct tpacket2_queue {
+ void *ring;
+
+ unsigned int last_used_idx;
+ unsigned int ring_size;
+ unsigned int frame_size_log2;
+};
+
+struct tp2_queue_pair {
+ struct tpacket2_queue rx;
+ struct tpacket2_queue tx;
+ int sfd;
+ const char *interface_name;
+};
+
+struct tpacket3_rx_queue {
+ void *ring;
+ struct tpacket3_hdr *frames[BATCH_SIZE];
+
+ unsigned int last_used_idx;
+ unsigned int ring_size; /* NB! blocks, not frames */
+ unsigned int block_size_log2;
+
+ struct tpacket3_hdr *last_frame;
+ unsigned int npkts; /* >0 in block */
+};
+
+struct tp3_queue_pair {
+ struct tpacket3_rx_queue rx;
+ struct tpacket2_queue tx;
+ int sfd;
+ const char *interface_name;
+};
+
+struct tp4_umem {
+ char *buffer;
+ size_t size;
+ unsigned int frame_size;
+ unsigned int frame_size_log2;
+ unsigned int nframes;
+ int mr_fd;
+ unsigned long free_stack[NUM_BUFFERS];
+ unsigned int free_stack_idx;
+};
+
+struct tp4_queue_pair {
+ struct tpacket4_queue rx;
+ struct tpacket4_queue tx;
+ int sfd;
+ const char *interface_name;
+ struct tp4_umem *umem;
+};
+
+struct benchmark {
+ void * (*configure)(const char *interface_name);
+ void (*rx)(void *queue_pair, unsigned int *start,
+ unsigned int *end);
+ void * (*get_data)(void *queue_pair, unsigned int idx,
+ unsigned int *len);
+ unsigned long (*get_data_desc)(void *queue_pair, unsigned int idx,
+ unsigned int *len,
+ unsigned short *offset);
+ void (*set_data_desc)(void *queue_pair, unsigned int idx,
+ unsigned long didx);
+ void (*process)(void *queue_pair, unsigned int start,
+ unsigned int end);
+ void (*rx_release)(void *queue_pair, unsigned int start,
+ unsigned int end);
+ void (*tx)(void *queue_pair, unsigned int start,
+ unsigned int end);
+};
+
+static char tx_frame[1024];
+static unsigned int tx_frame_len;
+static struct benchmark benchmark;
+
+#define lassert(expr) \
+ do { \
+ if (!(expr)) { \
+ fprintf(stderr, "%s:%s:%i: Assertion failed: " \
+ #expr ": errno: %d/\"%s\"\n", \
+ __FILE__, __func__, __LINE__, \
+ errno, strerror(errno)); \
+ exit(EXIT_FAILURE); \
+ } \
+ } while (0)
+
+#define barrier() __asm__ __volatile__("" : : : "memory")
+#define u_smp_rmb() barrier()
+#define u_smp_wmb() barrier()
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#define log2(x) \
+ ((unsigned int)(8 * sizeof(unsigned long long) - \
+ __builtin_clzll((x)) - 1))
+
+#if 0
+static void hex_dump(void *pkt, size_t length, const char *prefix)
+{
+ int i = 0;
+ const unsigned char *address = (unsigned char *)pkt;
+ const unsigned char *line = address;
+ size_t line_size = 32;
+ unsigned char c;
+
+ printf("%s | ", prefix);
+ while (length-- > 0) {
+ printf("%02X ", *address++);
+ if (!(++i % line_size) || (length == 0 && i % line_size)) {
+ if (length == 0) {
+ while (i++ % line_size)
+ printf("__ ");
+ }
+ printf(" | "); /* right close */
+ while (line < address) {
+ c = *line++;
+ printf("%c", (c < 33 || c == 255) ? 0x2E : c);
+ }
+ printf("\n");
+ if (length > 0)
+ printf("%s | ", prefix);
+ }
+ }
+ printf("\n");
+}
+#endif
+
+static size_t gen_eth_frame(char *frame, int data)
+{
+ static const char d[] =
+ "\x3c\xfd\xfe\x9e\x7f\x71\xec\xb1\xd7\x98\x3a\xc0\x08\x00\x45\x00"
+ "\x00\x2e\x00\x00\x00\x00\x40\x11\x88\x97\x05\x08\x07\x08\xc8\x14"
+ "\x1e\x04\x10\x92\x10\x92\x00\x1a\x6d\xa3\x34\x33\x1f\x69\x40\x6b"
+ "\x54\x59\xb6\x14\x2d\x11\x44\xbf\xaf\xd9\xbe\xaa";
+
+ (void)data;
+ memcpy(frame, d, sizeof(d) - 1);
+ return sizeof(d) - 1;
+
+#if 0
+ /* XXX This generates "multicast packets" */
+ struct ether_header *eh = (struct ether_header *)frame;
+ size_t len = sizeof(struct ether_header);
+ int i;
+
+ for (i = 0; i < 6; i++) {
+ eh->ether_shost[i] = i + 0x01;
+ eh->ether_dhost[i] = i + 0x11;
+ }
+ eh->ether_type = htons(ETH_P_IP);
+
+ for (i = 0; i < 46; i++)
+ frame[len++] = data;
+
+ return len;
+#endif
+}
+
+static void setup_tx_frame(void)
+{
+ tx_frame_len = gen_eth_frame(tx_frame, 42);
+}
+
+static void swap_mac_addresses(void *data)
+{
+ struct ether_header *eth = (struct ether_header *)data;
+ struct ether_addr *src_addr = (struct ether_addr *)ð->ether_shost;
+ struct ether_addr *dst_addr = (struct ether_addr *)ð->ether_dhost;
+ struct ether_addr tmp;
+
+ tmp = *src_addr;
+ *src_addr = *dst_addr;
+ *dst_addr = tmp;
+}
+
+static void rx_dummy(void *queue_pair, unsigned int *start, unsigned int *end)
+{
+ (void)queue_pair;
+ *start = 0;
+ *end = BATCH_SIZE;
+}
+
+static void rx_release_dummy(void *queue_pair, unsigned int start,
+ unsigned int end)
+{
+ (void)queue_pair;
+ (void)start;
+ (void)end;
+}
+
+static void *get_data_dummy(void *queue_pair, unsigned int idx,
+ unsigned int *len)
+{
+ (void)queue_pair;
+ (void)idx;
+
+ *len = tx_frame_len;
+
+ return tx_frame;
+}
+
+#if 0
+static void process_hexdump(void *queue_pair, unsigned int start,
+ unsigned int end)
+{
+ unsigned int len;
+ void *data;
+
+ while (start != end) {
+ data = benchmark.get_data(queue_pair, start, &len);
+ hex_dump(data, len, "Rx:");
+ start++;
+ }
+}
+#endif
+
+static void process_swap_mac(void *queue_pair, unsigned int start,
+ unsigned int end)
+{
+ unsigned int len;
+ void *data;
+
+ while (start != end) {
+ data = benchmark.get_data(queue_pair, start, &len);
+ swap_mac_addresses(data);
+ start++;
+ }
+}
+
+static void run_benchmark(const char *interface_name)
+{
+ unsigned int start, end;
+ struct tp2_queue_pair *qp;
+
+ qp = benchmark.configure(interface_name);
+
+ for (;;) {
+ for (;;) {
+ benchmark.rx(qp, &start, &end);
+ if ((end - start) > 0)
+ break;
+ // XXX
+ //if (poll)
+ // poll();
+ }
+
+ if (benchmark.process)
+ benchmark.process(qp, start, end);
+
+ benchmark.tx(qp, start, end);
+ }
+}
+
+static unsigned long get_nsecs(void)
+{
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return ts.tv_sec * 1000000000UL + ts.tv_nsec;
+}
+
+static void *tp2_configure(const char *interface_name)
+{
+ int sfd, noqdisc, ret, ver = TPACKET_V2;
+ struct tp2_queue_pair *tqp;
+ struct tpacket_req req = {};
+ struct sockaddr_ll ll;
+ void *rxring;
+
+ /* create PF_PACKET socket */
+ sfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
+ lassert(sfd >= 0);
+ ret = setsockopt(sfd, SOL_PACKET, PACKET_VERSION, &ver, sizeof(ver));
+ lassert(ret == 0);
+
+ tqp = calloc(1, sizeof(*tqp));
+ lassert(tqp);
+
+ tqp->sfd = sfd;
+ tqp->interface_name = interface_name;
+
+ req.tp_block_size = BLOCK_SIZE;
+ req.tp_frame_size = FRAME_SIZE;
+ req.tp_block_nr = NUM_BUFFERS * FRAME_SIZE / BLOCK_SIZE;
+ req.tp_frame_nr = req.tp_block_nr * BLOCK_SIZE / FRAME_SIZE;
+
+ ret = setsockopt(sfd, SOL_PACKET, PACKET_RX_RING, &req, sizeof(req));
+ lassert(ret == 0);
+ ret = setsockopt(sfd, SOL_PACKET, PACKET_TX_RING, &req, sizeof(req));
+ lassert(ret == 0);
+
+ rxring = mmap(0, 2 * req.tp_block_size * req.tp_block_nr,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_LOCKED | MAP_POPULATE, sfd, 0);
+ lassert(rxring != MAP_FAILED);
+
+ tqp->rx.ring = rxring;
+ tqp->rx.ring_size = NUM_BUFFERS;
+ tqp->rx.frame_size_log2 = log2(req.tp_frame_size);
+
+ tqp->tx.ring = rxring + req.tp_block_size * req.tp_block_nr;
+ tqp->tx.ring_size = NUM_BUFFERS;
+ tqp->tx.frame_size_log2 = log2(req.tp_frame_size);
+
+ ll.sll_family = PF_PACKET;
+ ll.sll_protocol = htons(ETH_P_ALL);
+ ll.sll_ifindex = if_nametoindex(interface_name);
+ ll.sll_hatype = 0;
+ ll.sll_pkttype = 0;
+ ll.sll_halen = 0;
+
+ noqdisc = 1;
+ ret = setsockopt(sfd, SOL_PACKET, PACKET_QDISC_BYPASS,
+ &noqdisc, sizeof(noqdisc));
+ lassert(ret == 0);
+
+ ret = bind(sfd, (struct sockaddr *)&ll, sizeof(ll));
+ lassert(ret == 0);
+
+ setup_tx_frame();
+
+ return tqp;
+}
+
+static void tp2_rx(void *queue_pair, unsigned int *start, unsigned int *end)
+{
+ struct tpacket2_queue *rxq = &((struct tp2_queue_pair *)queue_pair)->rx;
+ unsigned int batch = 0;
+
+ *start = rxq->last_used_idx;
+ *end = rxq->last_used_idx;
+
+ for (;;) {
+ unsigned int idx = *end & (rxq->ring_size - 1);
+ struct tpacket2_hdr *hdr;
+
+ hdr = (struct tpacket2_hdr *)(rxq->ring +
+ (idx << rxq->frame_size_log2));
+ if ((hdr->tp_status & TP_STATUS_USER) != TP_STATUS_USER)
+ break;
+
+ (*end)++;
+ if (++batch == BATCH_SIZE)
+ break;
+ }
+
+ rxq->last_used_idx = *end;
+ rx_npkts += (*end - *start);
+
+ /* status before data */
+ u_smp_rmb();
+}
+
+static void tp2_rx_release(void *queue_pair, unsigned int start,
+ unsigned int end)
+{
+ struct tpacket2_queue *rxq = &((struct tp2_queue_pair *)queue_pair)->rx;
+ struct tpacket2_hdr *hdr;
+
+ while (start != end) {
+ hdr = (struct tpacket2_hdr *)(rxq->ring +
+ ((start & (rxq->ring_size - 1))
+ << rxq->frame_size_log2));
+
+ hdr->tp_status = TP_STATUS_KERNEL;
+ start++;
+ }
+}
+
+static void *tp2_get_data(void *queue_pair, unsigned int idx, unsigned int *len)
+{
+ struct tpacket2_queue *rxq = &((struct tp2_queue_pair *)queue_pair)->rx;
+ struct tpacket2_hdr *hdr;
+
+ hdr = (struct tpacket2_hdr *)(rxq->ring + ((idx & (rxq->ring_size - 1))
+ << rxq->frame_size_log2));
+ *len = hdr->tp_snaplen;
+
+ return (char *)hdr + hdr->tp_mac;
+}
+
+static void tp2_tx(void *queue_pair, unsigned int start, unsigned int end)
+{
+ struct tp2_queue_pair *qp = queue_pair;
+ struct tpacket2_queue *txq = &qp->tx;
+ unsigned int len, curr = start;
+ void *data;
+ int ret;
+
+ while (curr != end) {
+ unsigned int idx = txq->last_used_idx & (txq->ring_size - 1);
+ struct tpacket2_hdr *hdr;
+
+ hdr = (struct tpacket2_hdr *)(txq->ring +
+ (idx << txq->frame_size_log2));
+ if (hdr->tp_status &
+ (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING)) {
+ break;
+ }
+
+ data = benchmark.get_data(queue_pair, curr, &len);
+
+ hdr->tp_snaplen = len;
+ hdr->tp_len = len;
+ memcpy((char *)hdr + TPACKET2_HDRLEN -
+ sizeof(struct sockaddr_ll), data, len);
+
+ u_smp_wmb();
+
+ hdr->tp_status = TP_STATUS_SEND_REQUEST;
+
+ txq->last_used_idx++;
+ curr++;
+ }
+
+ ret = sendto(qp->sfd, NULL, 0, MSG_DONTWAIT, NULL, 0);
+ if (!(ret >= 0 || errno == EAGAIN || errno == ENOBUFS))
+ lassert(0);
+
+ benchmark.rx_release(queue_pair, start, end);
+
+ tx_npkts += (curr - start);
+}
+
+static void *tp3_configure(const char *interface_name)
+{
+ int sfd, noqdisc, ret, ver = TPACKET_V3;
+ struct tp3_queue_pair *tqp;
+ struct tpacket_req3 req = {};
+ struct sockaddr_ll ll;
+ void *rxring;
+
+ unsigned int blocksiz = 1 << 22, framesiz = 1 << 11;
+ unsigned int blocknum = 64;
+
+ /* create PF_PACKET socket */
+ sfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
+ lassert(sfd >= 0);
+ ret = setsockopt(sfd, SOL_PACKET, PACKET_VERSION, &ver, sizeof(ver));
+ lassert(ret == 0);
+
+ tqp = calloc(1, sizeof(*tqp));
+ lassert(tqp);
+
+ tqp->sfd = sfd;
+ tqp->interface_name = interface_name;
+
+ /* XXX is is unfair to have 2 frames per block in V3? */
+ req.tp_block_size = BLOCK_SIZE;
+ req.tp_frame_size = FRAME_SIZE;
+ req.tp_block_nr = NUM_BUFFERS * FRAME_SIZE / BLOCK_SIZE;
+ req.tp_frame_nr = req.tp_block_nr * BLOCK_SIZE / FRAME_SIZE;
+ req.tp_retire_blk_tov = 0;
+ req.tp_sizeof_priv = 0;
+ req.tp_feature_req_word = 0;
+
+ ret = setsockopt(sfd, SOL_PACKET, PACKET_RX_RING, &req, sizeof(req));
+ lassert(ret == 0);
+ ret = setsockopt(sfd, SOL_PACKET, PACKET_TX_RING, &req, sizeof(req));
+ lassert(ret == 0);
+
+ rxring = mmap(0, 2 * req.tp_block_size * req.tp_block_nr,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_LOCKED | MAP_POPULATE, sfd, 0);
+ lassert(rxring != MAP_FAILED);
+
+ tqp->rx.ring = rxring;
+ tqp->rx.ring_size = blocknum;
+ tqp->rx.block_size_log2 = log2(blocksiz);
+
+ tqp->tx.ring = rxring + req.tp_block_size * req.tp_block_nr;
+ tqp->tx.ring_size = (blocksiz * blocknum) / framesiz;
+ tqp->tx.frame_size_log2 = log2(req.tp_frame_size);
+
+ ll.sll_family = PF_PACKET;
+ ll.sll_protocol = htons(ETH_P_ALL);
+ ll.sll_ifindex = if_nametoindex(interface_name);
+ ll.sll_hatype = 0;
+ ll.sll_pkttype = 0;
+ ll.sll_halen = 0;
+
+ noqdisc = 1;
+ ret = setsockopt(sfd, SOL_PACKET, PACKET_QDISC_BYPASS,
+ &noqdisc, sizeof(noqdisc));
+ lassert(ret == 0);
+
+ ret = bind(sfd, (struct sockaddr *)&ll, sizeof(ll));
+ lassert(ret == 0);
+
+ setup_tx_frame();
+
+ return tqp;
+}
+
+static void tp3_rx(void *queue_pair, unsigned int *start, unsigned int *end)
+{
+ struct tpacket3_rx_queue *rxq =
+ &((struct tp3_queue_pair *)queue_pair)->rx;
+ unsigned int i, npkts = BATCH_SIZE;
+ struct tpacket_block_desc *bd;
+ bool no_more_frames = false;
+
+ *start = 0;
+ *end = 0;
+
+ if (rxq->last_frame) {
+ if (rxq->npkts <= BATCH_SIZE) {
+ no_more_frames = true;
+ npkts = rxq->npkts;
+ }
+
+ for (i = 0; i < npkts; i++) {
+ rxq->last_frame = (struct tpacket3_hdr *)
+ ((char *)rxq->last_frame +
+ rxq->last_frame->tp_next_offset);
+ rxq->frames[i] = rxq->last_frame;
+ }
+
+ if (no_more_frames)
+ rxq->last_frame = NULL;
+
+ rxq->npkts -= npkts;
+ *end = npkts;
+ rx_npkts += npkts;
+
+ return;
+ }
+
+ bd = (struct tpacket_block_desc *)
+ (rxq->ring + ((rxq->last_used_idx & (rxq->ring_size - 1))
+ << rxq->block_size_log2));
+ if ((bd->hdr.bh1.block_status & TP_STATUS_USER) != TP_STATUS_USER)
+ return;
+
+ u_smp_rmb();
+
+ rxq->npkts = bd->hdr.bh1.num_pkts;
+ if (rxq->npkts <= BATCH_SIZE) {
+ no_more_frames = true;
+ npkts = rxq->npkts;
+ }
+
+ rxq->last_frame = (struct tpacket3_hdr *)
+ ((char *)bd + bd->hdr.bh1.offset_to_first_pkt);
+ rxq->frames[0] = rxq->last_frame;
+ for (i = 1; i < npkts; i++) {
+ rxq->last_frame = (struct tpacket3_hdr *)
+ ((char *)rxq->last_frame +
+ rxq->last_frame->tp_next_offset);
+ rxq->frames[i] = rxq->last_frame;
+ }
+
+ if (no_more_frames)
+ rxq->last_frame = NULL;
+
+ *end = npkts;
+ rx_npkts += npkts;
+}
+
+static void tp3_rx_release(void *queue_pair, unsigned int start,
+ unsigned int end)
+{
+ struct tpacket3_rx_queue *rxq =
+ &((struct tp3_queue_pair *)queue_pair)->rx;
+ struct tpacket_block_desc *bd;
+
+ (void)start;
+ (void)end;
+
+ if (rxq->last_frame)
+ return;
+
+ bd = (struct tpacket_block_desc *)
+ (rxq->ring + ((rxq->last_used_idx & (rxq->ring_size - 1))
+ << rxq->block_size_log2));
+
+ bd->hdr.bh1.block_status = TP_STATUS_KERNEL;
+ rxq->last_used_idx++;
+}
+
+static void *tp3_get_data(void *queue_pair, unsigned int idx, unsigned int *len)
+{
+ struct tpacket3_rx_queue *rxq =
+ &((struct tp3_queue_pair *)queue_pair)->rx;
+ struct tpacket3_hdr *hdr = rxq->frames[idx];
+
+ *len = hdr->tp_snaplen;
+
+ return (char *)hdr + hdr->tp_mac;
+}
+
+static void tp3_tx(void *queue_pair, unsigned int start, unsigned int end)
+{
+ struct tp3_queue_pair *qp = queue_pair;
+ struct tpacket2_queue *txq = &qp->tx;
+ unsigned int len, curr = start;
+ void *data;
+ int ret;
+
+ while (curr != end) {
+ unsigned int idx = txq->last_used_idx & (txq->ring_size - 1);
+ struct tpacket3_hdr *hdr;
+
+ hdr = (struct tpacket3_hdr *)(txq->ring +
+ (idx << txq->frame_size_log2));
+ if (hdr->tp_status &
+ (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING)) {
+ break;
+ }
+
+ data = benchmark.get_data(queue_pair, curr, &len);
+
+ hdr->tp_snaplen = len;
+ hdr->tp_len = len;
+ memcpy((char *)hdr + TPACKET3_HDRLEN -
+ sizeof(struct sockaddr_ll), data, len);
+
+ u_smp_wmb();
+
+ hdr->tp_status = TP_STATUS_SEND_REQUEST;
+
+ txq->last_used_idx++;
+ curr++;
+ }
+
+ ret = sendto(qp->sfd, NULL, 0, MSG_DONTWAIT, NULL, 0);
+ if (!(ret >= 0 || errno == EAGAIN || errno == ENOBUFS))
+ lassert(0);
+
+ benchmark.rx_release(queue_pair, start, end);
+
+ tx_npkts += (curr - start);
+}
+
+static inline void push_free_stack(struct tp4_umem *umem, unsigned long idx)
+{
+ umem->free_stack[--umem->free_stack_idx] = idx;
+}
+
+static inline unsigned long pop_free_stack(struct tp4_umem *umem)
+{
+ return umem->free_stack[umem->free_stack_idx++];
+}
+
+static struct tp4_umem *alloc_and_register_buffers(size_t nbuffers)
+{
+ struct tpacket_memreg_req req = { .frame_size = FRAME_SIZE };
+ struct tp4_umem *umem;
+ size_t i;
+ int fd, ret;
+ void *bufs;
+
+ ret = posix_memalign((void **)&bufs, getpagesize(),
+ nbuffers * req.frame_size);
+ lassert(ret == 0);
+
+ umem = calloc(1, sizeof(*umem));
+ lassert(umem);
+ fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
+ lassert(fd > 0);
+ req.addr = (unsigned long)bufs;
+ req.len = nbuffers * req.frame_size;
+ ret = setsockopt(fd, SOL_PACKET, PACKET_MEMREG, &req, sizeof(req));
+ lassert(ret == 0);
+
+ umem->frame_size = FRAME_SIZE;
+ umem->frame_size_log2 = log2(FRAME_SIZE);
+ umem->buffer = bufs;
+ umem->size = nbuffers * req.frame_size;
+ umem->nframes = nbuffers;
+ umem->mr_fd = fd;
+
+ for (i = 0; i < nbuffers; i++)
+ umem->free_stack[i] = i;
+
+ for (i = 0; i < nbuffers; i++) {
+ tx_frame_len = gen_eth_frame(bufs, 42);
+ bufs += FRAME_SIZE;
+ }
+
+ return umem;
+}
+
+static inline int tp4q_enqueue(struct tpacket4_queue *q,
+ const struct tpacket4_desc *d,
+ unsigned int dcnt)
+{
+ unsigned int avail_idx = q->avail_idx;
+ unsigned int i;
+ int j;
+
+ if (q->num_free < dcnt)
+ return -ENOSPC;
+
+ q->num_free -= dcnt;
+
+ for (i = 0; i < dcnt; i++) {
+ unsigned int idx = (avail_idx++) & q->ring_mask;
+
+ q->ring[idx].idx = d[i].idx;
+ q->ring[idx].len = d[i].len;
+ q->ring[idx].offset = d[i].offset;
+ q->ring[idx].error = 0;
+ }
+ u_smp_wmb();
+
+ for (j = dcnt - 1; j >= 0; j--) {
+ unsigned int idx = (q->avail_idx + j) & q->ring_mask;
+
+ q->ring[idx].flags = d[j].flags | TP4_DESC_KERNEL;
+ }
+ q->avail_idx += dcnt;
+
+ return 0;
+}
+
+static void *tp4_configure(const char *interface_name)
+{
+ int sfd, noqdisc, ret, ver = TPACKET_V4;
+ struct tpacket_req4 req = {};
+ struct tp4_queue_pair *tqp;
+ struct sockaddr_ll ll;
+ unsigned int i;
+ void *rxring;
+
+ /* create PF_PACKET socket */
+ sfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
+ lassert(sfd >= 0);
+ ret = setsockopt(sfd, SOL_PACKET, PACKET_VERSION, &ver, sizeof(ver));
+ lassert(ret == 0);
+
+ tqp = calloc(1, sizeof(*tqp));
+ lassert(tqp);
+
+ tqp->sfd = sfd;
+ tqp->interface_name = interface_name;
+
+ tqp->umem = alloc_and_register_buffers(NUM_BUFFERS);
+ lassert(tqp->umem);
+
+ req.mr_fd = tqp->umem->mr_fd;
+ req.desc_nr = NUM_DESCS;
+ ret = setsockopt(sfd, SOL_PACKET, PACKET_RX_RING, &req, sizeof(req));
+ lassert(ret == 0);
+ ret = setsockopt(sfd, SOL_PACKET, PACKET_TX_RING, &req, sizeof(req));
+ lassert(ret == 0);
+
+ rxring = mmap(0, 2 * req.desc_nr * sizeof(struct tpacket4_desc),
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_LOCKED | MAP_POPULATE, sfd, 0);
+ lassert(rxring != MAP_FAILED);
+
+ tqp->rx.ring = rxring;
+ tqp->rx.num_free = req.desc_nr;
+ tqp->rx.ring_mask = req.desc_nr - 1;
+
+ tqp->tx.ring = &tqp->rx.ring[req.desc_nr];
+ tqp->tx.num_free = req.desc_nr;
+ tqp->tx.ring_mask = req.desc_nr - 1;
+
+ ll.sll_family = PF_PACKET;
+ ll.sll_protocol = htons(ETH_P_ALL);
+ ll.sll_ifindex = if_nametoindex(interface_name);
+ ll.sll_hatype = 0;
+ ll.sll_pkttype = 0;
+ ll.sll_halen = 0;
+
+ noqdisc = 1;
+ ret = setsockopt(sfd, SOL_PACKET, PACKET_QDISC_BYPASS,
+ &noqdisc, sizeof(noqdisc));
+ lassert(ret == 0);
+
+ ret = bind(sfd, (struct sockaddr *)&ll, sizeof(ll));
+ lassert(ret == 0);
+
+ if (opt_zerocopy > 0) {
+ ret = setsockopt(sfd, SOL_PACKET, PACKET_ZEROCOPY,
+ &opt_zerocopy, sizeof(opt_zerocopy));
+ lassert(ret == 0);
+ }
+
+ for (i = 0; i < (tqp->rx.ring_mask + 1)/4; i++) {
+ struct tpacket4_desc desc = {};
+
+ desc.idx = i;
+ ret = tp4q_enqueue(&tqp->rx, &desc, 1);
+ lassert(ret == 0);
+ }
+
+ return tqp;
+}
+
+static void tp4_rx(void *queue_pair, unsigned int *start, unsigned int *end)
+{
+ struct tpacket4_queue *q = &((struct tp4_queue_pair *)queue_pair)->rx;
+ unsigned int idx, recv_size, last_used = q->last_used_idx;
+ unsigned int uncleared = (q->avail_idx - last_used);
+
+ *start = last_used;
+ *end = last_used;
+ recv_size = (uncleared < BATCH_SIZE) ? uncleared : BATCH_SIZE;
+
+ idx = (last_used + recv_size - 1) & q->ring_mask;
+ if (q->ring[idx].flags & TP4_DESC_KERNEL)
+ return;
+
+ *end += recv_size;
+ rx_npkts += recv_size;
+ q->num_free = recv_size;
+
+ u_smp_rmb();
+}
+
+static inline void tp4_rx_release(void *queue_pair, unsigned int start,
+ unsigned int end)
+{
+ struct tp4_queue_pair *qp = queue_pair;
+ struct tpacket4_queue *q = &qp->rx;
+ struct tpacket4_desc *src, *dst;
+ unsigned int nitems = end - start;
+
+ while (nitems--) {
+ dst = &q->ring[(q->avail_idx++) & q->ring_mask];
+ src = &q->ring[start++ & q->ring_mask];
+ *dst = *src;
+
+ u_smp_wmb();
+
+ dst->flags = TP4_DESC_KERNEL;
+ }
+
+ q->last_used_idx += q->num_free;
+ q->num_free = 0;
+}
+
+static inline void *tp4_get_data(void *queue_pair, unsigned int idx,
+ unsigned int *len)
+{
+ struct tp4_queue_pair *qp = (struct tp4_queue_pair *)queue_pair;
+ struct tp4_umem *umem = qp->umem;
+ struct tpacket4_desc *d;
+
+ d = &qp->rx.ring[idx & qp->rx.ring_mask];
+ *len = d->len;
+
+ return (char *)umem->buffer + (d->idx << umem->frame_size_log2)
+ + d->offset;
+}
+
+
+static inline unsigned long tp4_get_data_desc(void *queue_pair,
+ unsigned int idx,
+ unsigned int *len,
+ unsigned short *offset)
+{
+ struct tp4_queue_pair *qp = queue_pair;
+ struct tpacket4_queue *q = &qp->rx;
+ struct tpacket4_desc *d;
+
+ d = &q->ring[idx & q->ring_mask];
+ *len = d->len;
+ *offset = d->offset;
+
+ return d->idx;
+}
+
+static inline unsigned long tp4_get_data_desc_dummy(void *queue_pair,
+ unsigned int idx,
+ unsigned int *len,
+ unsigned short *offset)
+{
+ struct tp4_queue_pair *qp = queue_pair;
+
+ (void)idx;
+
+ *len = tx_frame_len;
+ *offset = 0;
+
+ return pop_free_stack(qp->umem);
+}
+
+static inline void tp4_set_data_desc(void *queue_pair, unsigned int idx,
+ unsigned long didx)
+{
+ struct tp4_queue_pair *qp = queue_pair;
+ struct tpacket4_queue *q = &qp->rx;
+ struct tpacket4_desc *d;
+
+ d = &q->ring[idx & q->ring_mask];
+ d->idx = didx;
+}
+
+static inline void tp4_set_data_desc_dummy(void *queue_pair, unsigned int idx,
+ unsigned long didx)
+{
+ struct tp4_queue_pair *qp = queue_pair;
+
+ (void)idx;
+
+ push_free_stack(qp->umem, didx);
+}
+
+static void tp4_tx(void *queue_pair, unsigned int start, unsigned int end)
+{
+ struct tp4_queue_pair *qp = (struct tp4_queue_pair *)queue_pair;
+ struct tpacket4_queue *q = &qp->tx;
+ unsigned int i, aidx, uidx, send_size, s, entries, ncleared = 0;
+ unsigned long cleared[BATCH_SIZE];
+ int ret;
+
+ entries = end - start;
+
+ if (q->num_free != NUM_DESCS) {
+ for (i = 0; i < entries; i++) {
+ uidx = q->last_used_idx & q->ring_mask;
+ if (q->ring[uidx].flags & TP4_DESC_KERNEL)
+ break;
+
+ q->last_used_idx++;
+ cleared[i] = q->ring[uidx].idx;
+ q->num_free++;
+ ncleared++;
+ }
+ }
+
+ tx_npkts += ncleared;
+
+ send_size = (q->num_free < entries) ? q->num_free : entries;
+ i = 0;
+ s = start;
+ q->num_free -= send_size;
+
+ while (send_size--) {
+ aidx = q->avail_idx++ & q->ring_mask;
+
+ q->ring[aidx].idx = benchmark.get_data_desc(
+ qp, s, &q->ring[aidx].len,
+ &q->ring[aidx].offset);
+ if (i < ncleared)
+ benchmark.set_data_desc(qp, s++, cleared[i++]);
+
+ u_smp_wmb();
+
+ q->ring[aidx].flags = TP4_DESC_KERNEL;
+ }
+
+ benchmark.rx_release(queue_pair, start, start + ncleared);
+
+ ret = sendto(qp->sfd, NULL, 0, MSG_DONTWAIT, NULL, 0);
+ if (!(ret >= 0 || errno == EAGAIN || errno == ENOBUFS))
+ lassert(0);
+}
+
+static struct benchmark benchmarks[3][3] = {
+ { /* V2 */
+ { .configure = tp2_configure,
+ .rx = tp2_rx,
+ .get_data = NULL,
+ .get_data_desc = NULL,
+ .set_data_desc = NULL,
+ .process = NULL,
+ .rx_release = NULL,
+ .tx = tp2_rx_release,
+ },
+ { .configure = tp2_configure,
+ .rx = rx_dummy,
+ .get_data = get_data_dummy,
+ .get_data_desc = NULL,
+ .set_data_desc = NULL,
+ .process = NULL,
+ .rx_release = rx_release_dummy,
+ .tx = tp2_tx,
+ },
+ { .configure = tp2_configure,
+ .rx = tp2_rx,
+ .get_data = tp2_get_data,
+ .get_data_desc = NULL,
+ .set_data_desc = NULL,
+ .process = process_swap_mac,
+ .rx_release = tp2_rx_release,
+ .tx = tp2_tx,
+ }
+ },
+ { /* V3 */
+ { .configure = tp3_configure,
+ .rx = tp3_rx,
+ .get_data = NULL,
+ .get_data_desc = NULL,
+ .set_data_desc = NULL,
+ .process = NULL,
+ .rx_release = NULL,
+ .tx = tp3_rx_release,
+ },
+ { .configure = tp3_configure,
+ .rx = rx_dummy,
+ .get_data = get_data_dummy,
+ .get_data_desc = NULL,
+ .set_data_desc = NULL,
+ .process = NULL,
+ .rx_release = rx_release_dummy,
+ .tx = tp3_tx,
+ },
+ { .configure = tp3_configure,
+ .rx = tp3_rx,
+ .get_data = tp3_get_data,
+ .set_data_desc = NULL,
+ .get_data_desc = NULL,
+ .process = process_swap_mac,
+ .rx_release = tp3_rx_release,
+ .tx = tp3_tx,
+ }
+ },
+ { /* V4 */
+ { .configure = tp4_configure,
+ .rx = tp4_rx,
+ .get_data = NULL,
+ .get_data_desc = NULL,
+ .set_data_desc = NULL,
+ .process = NULL,
+ .rx_release = NULL,
+ .tx = tp4_rx_release,
+ },
+ { .configure = tp4_configure,
+ .rx = rx_dummy,
+ .get_data = NULL,
+ .get_data_desc = tp4_get_data_desc_dummy,
+ .set_data_desc = tp4_set_data_desc_dummy,
+ .process = NULL,
+ .rx_release = rx_release_dummy,
+ .tx = tp4_tx,
+ },
+ { .configure = tp4_configure,
+ .rx = tp4_rx,
+ .get_data = tp4_get_data,
+ .get_data_desc = tp4_get_data_desc,
+ .set_data_desc = tp4_set_data_desc,
+ .process = process_swap_mac,
+ .rx_release = tp4_rx_release,
+ .tx = tp4_tx,
+ }
+ }
+};
+
+static struct benchmark *get_benchmark(enum tpacket_version ver,
+ enum benchmark_type type)
+{
+ return &benchmarks[ver][type];
+}
+
+
+
+
+static struct option long_options[] = {
+ {"version", required_argument, 0, 'v'},
+ {"rxdrop", no_argument, 0, 'r'},
+ {"txonly", no_argument, 0, 't'},
+ {"l2fwd", no_argument, 0, 'l'},
+ {"zerocopy", required_argument, 0, 'z'},
+ {"interface", required_argument, 0, 'i'},
+ {0, 0, 0, 0}
+};
+
+static void usage(void)
+{
+ const char *str =
+ " Usage: tpbench [OPTIONS]\n"
+ " Options:\n"
+ " -v, --version=n Use tpacket version n (default 4)\n"
+ " -r, --rxdrop Discard all incoming packets (default)\n"
+ " -t, --txonly Only send packets\n"
+ " -l, --l2fwd MAC swap L2 forwarding\n"
+ " -z, --zerocopy=n Enable zero-copy on queue n\n"
+ " -i, --interface=n Run on interface n\n"
+ "\n";
+ fprintf(stderr, "%s", str);
+ exit(EXIT_FAILURE);
+}
+
+static void parse_command_line(int argc, char **argv)
+{
+ int option_index, c, version, ret;
+
+ opterr = 0;
+
+ for (;;) {
+ c = getopt_long(argc, argv, "v:rtlz:i:", long_options,
+ &option_index);
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'v':
+ version = atoi(optarg);
+ if (version < 2 || version > 4) {
+ fprintf(stderr,
+ "ERROR: version has to be [2,4]\n");
+ usage();
+ }
+ opt_tpver = version - 2;
+ break;
+ case 'r':
+ opt_bench = BENCH_RXDROP;
+ break;
+ case 't':
+ opt_bench = BENCH_TXONLY;
+ break;
+ case 'l':
+ opt_bench = BENCH_L2FWD;
+ break;
+ case 'z':
+ opt_zerocopy = atoi(optarg);
+ break;
+ case 'i':
+ opt_if = optarg;
+ break;
+ default:
+ usage();
+ }
+ }
+
+ if (opt_zerocopy > 0 && opt_tpver != PV4) {
+ fprintf(stderr, "ERROR: version 4 required for zero-copy\n");
+ usage();
+ }
+
+ ret = if_nametoindex(opt_if);
+ if (!ret) {
+ fprintf(stderr, "ERROR: interface \"%s\" does not exist\n",
+ opt_if);
+ usage();
+ }
+}
+
+static void print_benchmark(bool running)
+{
+ const char *bench_str = "INVALID";
+
+ if (opt_bench == BENCH_RXDROP)
+ bench_str = "rxdrop";
+ else if (opt_bench == BENCH_TXONLY)
+ bench_str = "txonly";
+ else if (opt_bench == BENCH_L2FWD)
+ bench_str = "l2fwd";
+
+ printf("%s v%d %s ", opt_if, opt_tpver + 2, bench_str);
+ if (opt_zerocopy > 0)
+ printf("zc ");
+ else
+ printf(" ");
+
+ if (running) {
+ printf("running...");
+ fflush(stdout);
+ }
+}
+
+static void sigdie(int sig)
+{
+ unsigned long stop_time = get_nsecs();
+ long dt = stop_time - start_time;
+ (void)sig;
+
+ double rx_pps = rx_npkts * 1000000000. / dt;
+ double tx_pps = tx_npkts * 1000000000. / dt;
+
+ printf("\r");
+ print_benchmark(false);
+ printf("duration %4.2fs rx: %16lupkts @ %16.2fpps tx: %16lupkts @ %16.2fpps.\n",
+ dt / 1000000000., rx_npkts, rx_pps, tx_npkts, tx_pps);
+
+ exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+ signal(SIGINT, sigdie);
+ parse_command_line(argc, argv);
+ print_benchmark(true);
+ benchmark = *get_benchmark(opt_tpver, opt_bench);
+ start_time = get_nsecs();
+ run_benchmark(opt_if);
+
+ return 0;
+}
--
2.11.0
Powered by blists - more mailing lists