Skip to content

Instantly share code, notes, and snippets.

@giannitedesco
Created June 26, 2013 00:20
Show Gist options
  • Save giannitedesco/5863705 to your computer and use it in GitHub Desktop.
Save giannitedesco/5863705 to your computer and use it in GitHub Desktop.
TPACKET_V3 mmap packet sockets, showing off flexible frame sizes and multi-process hash fanout
/* Copyright (c) 2013 Gianni Tedesco
* Released under the terms of the GNU GPL version 3
* mmap() packet socket transmission
*/
#ifndef __linux__
#error "Are you loco? This is Linux only!"
#endif
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <assert.h>
#include <signal.h>
#define __USE_XOPEN
#include <sys/poll.h>
#include <sys/socket.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <features.h> /* for the glibc version number */
#if __GLIBC__ >= 2 && __GLIBC_MINOR >= 1
#include <netpacket/packet.h>
#include <net/ethernet.h> /* the L2 protocols */
#else
#include <asm/types.h>
#endif
#include <linux/if.h>
#include <linux/if_packet.h>
#include <linux/if_ether.h> /* The L2 protocols */
#include <string.h>
#include <netinet/in.h>
//#include <asm/system.h>
#include <rxring.h>
struct priv {
/* unused */
};
struct _rxring {
void *user;
rx_cb_t cb;
uint8_t *map;
size_t map_sz;
sig_atomic_t cancel;
unsigned int r_idx;
unsigned int nr_blocks;
unsigned int block_sz;
int ifindex;
int fd;
};
#define NUM_BLOCKS 2049
/* 1. Open the packet socket */
static int packet_socket(rxring_t rx)
{
if ((rx->fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL))) < 0) {
perror("socket()");
return 0;
}
return 1;
}
/* 2. Set TPACKET_V3 */
static int set_v3(rxring_t rx)
{
int val = TPACKET_V3;
if (setsockopt(rx->fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val))) {
perror("setsockopt(TPACKET_V3)");
return 0;
};
return 1;
}
/* 3. Setup the fd for mmap() ring buffer */
static int rx_ring(rxring_t rx)
{
struct tpacket_req3 req;
req.tp_block_size = getpagesize() << 2;
req.tp_block_nr = NUM_BLOCKS;
req.tp_frame_size = TPACKET_ALIGNMENT << 7;
req.tp_frame_nr = req.tp_block_size /
req.tp_frame_size *
req.tp_block_nr;
req.tp_retire_blk_tov = 64;
req.tp_sizeof_priv = sizeof(struct priv);
req.tp_feature_req_word = 0;
//req.tp_feature_req_word |= TP_REQ_FILL_RXHASH;
if (setsockopt(rx->fd, SOL_PACKET, PACKET_RX_RING,
(char *)&req, sizeof(req))) {
perror("setsockopt(PACKET_RX_RING)");
return 0;
};
rx->map_sz = req.tp_block_size * req.tp_block_nr;
rx->nr_blocks = req.tp_block_nr;
rx->block_sz = req.tp_block_size;
return 1;
}
/* 4. Bind to the ifindex on our sending interface */
static int bind_if(rxring_t rx, const char *ifname)
{
struct sockaddr_ll sll;
if ( ifname ) {
struct ifreq ifr;
snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s", ifname);
if ( ioctl(rx->fd, SIOCGIFINDEX, &ifr) ) {
perror("ioctl");
return 0;
}
rx->ifindex = ifr.ifr_ifindex;
}else{
/* interface "any" */
rx->ifindex = 0;
}
memset(&sll, 0, sizeof(sll));
sll.sll_family = PF_PACKET;
sll.sll_protocol = htons(ETH_P_ALL);
sll.sll_ifindex = rx->ifindex;
if ( bind(rx->fd, (struct sockaddr *)&sll, sizeof(sll)) ) {
perror("bind()");
return 0;
}
return 1;
}
/* 5. finally mmap() the sucker */
static int map_ring(rxring_t rx)
{
printf("mapping %zu MiB ring buffer\n", rx->map_sz >> 20);
rx->map = mmap(NULL, rx->map_sz, PROT_READ | PROT_WRITE,
MAP_SHARED, rx->fd, 0);
if (rx->map == MAP_FAILED) {
perror("mmap()");
return 0;
}
return 1;
}
rxring_t rxring_init(const char *ifname, rx_cb_t cb, void *user)
{
struct _rxring *rx;
rx = calloc(1, sizeof(*rx));
if ( NULL == rx )
goto out;
if ( !packet_socket(rx) )
goto out_free;
if ( !set_v3(rx) )
goto out_close;
if ( !rx_ring(rx) )
goto out_close;
if ( !bind_if(rx, ifname) )
goto out_close;
if ( !map_ring(rx) )
goto out_close;
rx->cb = cb;
rx->user = user;
/* success */
goto out;
out_close:
close(rx->fd);
out_free:
free(rx);
rx = NULL;
out:
return rx;
}
int rxring_fanout_hash(rxring_t rx, uint16_t id)
{
int val = TPACKET_V3;
val = PACKET_FANOUT_FLAG_DEFRAG | (PACKET_FANOUT_HASH << 16) | id;
if (setsockopt(rx->fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) {
perror("setsockopt(PACKET_FANOUT)");
return 0;
};
return 1;
}
static void do_block(rxring_t rx, struct tpacket_block_desc *desc)
{
const uint8_t *ptr;
struct tpacket3_hdr *hdr;
unsigned int num_pkts, i;
ptr = (uint8_t *)desc + desc->hdr.bh1.offset_to_first_pkt;
num_pkts = desc->hdr.bh1.num_pkts;
for(i = 0; i < num_pkts; i++) {
hdr = (struct tpacket3_hdr *)ptr;
printf("packet %u/%u %u.%u\n",
i, num_pkts, hdr->tp_sec, hdr->tp_nsec);
/* packet */
if ( rx->cb )
(*rx->cb)(rx->user, ptr + hdr->tp_mac, hdr->tp_snaplen);
ptr += hdr->tp_next_offset;
__sync_synchronize();
}
}
void rxring_mainloop(rxring_t rx)
{
struct tpacket_block_desc *desc;
struct pollfd pfd;
pfd.fd = rx->fd;
pfd.events = POLLIN | POLLERR;
pfd.revents = 0;
while(!rx->cancel) {
desc = (struct tpacket_block_desc *)
rx->map + rx->r_idx * rx->block_sz;
while(!(desc->hdr.bh1.block_status & TP_STATUS_USER))
poll(&pfd, 1, -1);
/* walk block */
do_block(rx, desc);
desc->hdr.bh1.block_status = TP_STATUS_KERNEL;
__sync_synchronize();
rx->r_idx = (rx->r_idx + 1) % rx->nr_blocks;
}
}
void rxring_cancel_mainloop(rxring_t rx)
{
rx->cancel = 1;
}
void rxring_free(rxring_t rx)
{
if ( rx ) {
munmap(rx->map, rx->map_sz);
close(rx->fd);
free(rx);
}
}
/* Copyright (c) 2013 Gianni Tedesco */
#ifndef _RX_RING_H
#define _RX_RING_H
typedef struct _rxring *rxring_t;
typedef int (*rx_cb_t)(void *u, const uint8_t *buf, size_t len);
rxring_t rxring_init(const char *ifname, rx_cb_t cb, void *user);
int rxring_fanout_hash(rxring_t rx, uint16_t id);
void rxring_mainloop(rxring_t rx);
void rxring_cancel_mainloop(rxring_t rx);
void rxring_free(rxring_t rx);
#endif /* _RX_RING_H */
/* Copyright (c) 2013 Gianni Tedesco
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include <string.h>
#include <ctype.h>
#include <rxring.h>
#include <pkt.h>
static const char *cmd = "rx-test";
static rxring_t rx;
static void hex_dumpf(FILE *f, const uint8_t *tmp, size_t len, size_t llen)
{
size_t i, j;
size_t line;
if ( NULL == f || 0 == len )
return;
if ( !llen )
llen = 0x10;
for(j = 0; j < len; j += line, tmp += line) {
if ( j + llen > len ) {
line = len - j;
}else{
line = llen;
}
fprintf(f, " | %05zx : ", j);
for(i = 0; i < line; i++) {
if ( isprint(tmp[i]) ) {
fprintf(f, "%c", tmp[i]);
}else{
fprintf(f, ".");
}
}
for(; i < llen; i++)
fprintf(f, " ");
for(i = 0; i < line; i++)
fprintf(f, " %02x", tmp[i]);
fprintf(f, "\n");
}
fprintf(f, "\n");
}
static int cb(void *u, const uint8_t *buf, size_t len)
{
hex_dumpf(stdout, buf, len, 0);
return 1;
}
int main(int argc, char **argv)
{
if ( argc > 0 )
cmd = argv[0];
if ( argc < 2 ) {
fprintf(stderr, "%s: Usage:\n\t%s <ifname>\n\n", cmd, cmd);
return EXIT_FAILURE;
}
rx = rxring_init(argv[1], cb, NULL);
if ( NULL == rx )
return EXIT_FAILURE;
if ( !rxring_fanout_hash(rx, 0x1234) )
return EXIT_FAILURE;
rxring_mainloop(rx);
printf("%s: OK\n", cmd);
rxring_free(rx);
return EXIT_SUCCESS;
}
@westtrd
Copy link

westtrd commented Jan 22, 2014

Interesting sample
Can you show me technique how to join packet socket to specific multicast groups

Regards

@Shengliang
Copy link

rxring.c:232: need a "()". do adding before casting to (struct tpacket_block_desc *) type.
desc = (struct tpacket_block_desc *)
(rx->map + rx->r_idx * rx->block_sz);

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment