Commit ccd1962e authored by Leonardo Lai's avatar Leonardo Lai

major rework of l4 switching

primitive support to reuseport and reuseaddr
parents 03dc494d 16f49d51
......@@ -35,3 +35,5 @@ This software uses code from:
License: BSD 3-Clause
- inih: https://github.com/benhoyt/inih
License: BSD 3-Clause
- list: https://github.com/clibs/list
License: MIT
......@@ -38,7 +38,7 @@ The list of hardware officially supported by DPDK is available [here](https://co
## Install Dependencies
UDPDK requires:
- DPDK 19.11
- DPDK 20.05
- inih (any)
They are already included in this repository as submodules, so pull them:
......@@ -56,9 +56,9 @@ cd dpdk/usertools
```
From the menu, do the following:
1. Compile for your specific arch, usually `x86_64-native-linuxapp-gcc`
2. Load the `igb` module
2. Load the `vfio` module
3. Configure hugepages (e.g. 1024M for each NUMA node)
4. Bind the NIC to igb driver, specifying its PCI address
4. Bind the NIC to vfio driver, specifying its PCI address
### inih
......
......@@ -17,7 +17,7 @@ endif
SRCS= main.c
LIBS+= -L${UDPDK_PATH}/udpdk -Wl,--whole-archive,-ludpdk,--no-whole-archive
LIBS+= -Wl,--whole-archive,-ldpdk,--no-whole-archive
LIBS+= -L${RTE_SDK}/${RTE_TARGET}/lib -Wl,--whole-archive,-ldpdk,--no-whole-archive
LIBS+= -Wl,--no-whole-archive -lrt -lm -ldl -lcrypto -pthread -lnuma
CFLAGS += $(WERROR_FLAGS) -O3
......@@ -28,4 +28,4 @@ all:
.PHONY: clean
clean:
rm -f *.o ${TARGET}
\ No newline at end of file
rm -f *.o ${TARGET}
......@@ -17,7 +17,7 @@ endif
SRCS= main.c
LIBS+= -L${UDPDK_PATH}/udpdk -Wl,--whole-archive,-ludpdk,--no-whole-archive
LIBS+= -Wl,--whole-archive,-ldpdk,--no-whole-archive
LIBS+= -L${RTE_SDK}/${RTE_TARGET}/lib -Wl,--whole-archive,-ldpdk,--no-whole-archive
LIBS+= -Wl,--no-whole-archive -lrt -lm -ldl -lcrypto -pthread -lnuma
CFLAGS += $(WERROR_FLAGS) -O3
......@@ -28,4 +28,4 @@ all:
.PHONY: clean
clean:
rm -f *.o ${TARGET}
\ No newline at end of file
rm -f *.o ${TARGET}
Sender:
sudo ./pktgen -c ../../config.ini -f send -l 100
Receiver:
sudo ./pktgen -c ../../config.ini -f recv
Note: '-l' is the length of the whole frame, including MAC, IPv4 and UDP headers
Subproject commit 7001c8fdb27357c67147c0a13cb3826e48c0f2bf
Subproject commit e2a234488854fdeee267a2aa582aa082fce01d6e
......@@ -33,21 +33,34 @@ CFLAGS+= --param large-function-growth=1000
DPDK_CFLAGS= -DRTE_MACHINE_CPUFLAG_SSE -DRTE_MACHINE_CPUFLAG_SSE2 -DRTE_MACHINE_CPUFLAG_SSE3
DPDK_CFLAGS+= -DRTE_MACHINE_CPUFLAG_SSSE3 -DRTE_MACHINE_CPUFLAG_SSE4_1 -DRTE_MACHINE_CPUFLAG_SSE4_2
DPDK_CFLAGS+= -DRTE_COMPILE_TIME_CPUFLAGS=RTE_CPUFLAG_SSE,RTE_CPUFLAG_SSE2,RTE_CPUFLAG_SSE3,RTE_CPUFLAG_SSSE3,RTE_CPUFLAG_SSE4_1,RTE_CPUFLAG_SSE4_2
DPDK_CFLAGS+= -I${UDPDK_DPDK}/include
DPDK_CFLAGS+= -I${INIH}
UDPDK_C= ${CC} -c $(DPDK_CFLAGS) ${CFLAGS} ${WERROR} $<
UDPDK_CFLAGS+= -I${UDPDK_DPDK}/include
UDPDK_CFLAGS+= -I${INIH}
UDPDK_CFLAGS+= -I. -Ilist -Ishmalloc
UDPDK_SRCS+= \
UDPDK_C= ${CC} -c $(DPDK_CFLAGS) $(UDPDK_CFLAGS) ${CFLAGS} ${WERROR} $<
UDPDK_CORE_SRCS+= \
udpdk_args.c \
udpdk_globals.c \
udpdk_init.c \
udpdk_lookup_table.c \
udpdk_bind_table.c \
udpdk_poller.c \
udpdk_syscall.c \
UDPDK_LIST_SRCS+= \
list/udpdk_list.c \
list/udpdk_list_node.c \
list/udpdk_list_iterator.c \
list/udpdk_list_globals.c \
list/udpdk_list_init.c
UDPDK_SHM_SRCS+= \
shmalloc/udpdk_shmalloc.c \
SRCS= ${UDPDK_SRCS}
SRCS+= ${UDPDK_CORE_SRCS}
SRCS+= ${UDPDK_LIST_SRCS}
SRCS+= ${UDPDK_SHM_SRCS}
OBJS+= $(patsubst %.c,%.o,${SRCS})
......@@ -67,7 +80,7 @@ libudpdk.a: ${OBJS}
rm -f $*.ro
${OBJS}: %.o: %.c
${UDPDK_C}
${UDPDK_C} -o $@
.PHONY: clean
clean:
......@@ -75,31 +88,20 @@ clean:
rm -f ${OBJS} ${PROGRAM}
install:
rm -rf ${PREFIX_LIB}/libudpdk.a.${UDPDK_VERSION}
rm -rf ${PREFIX_LIB}/libudpdk.a
rm -rf ${PREFIX_INCLUDE}/udpdk_api.h
rm -rf ${PREFIX_INCLUDE}/udpdk_args.h
rm -rf ${PREFIX_INCLUDE}/udpdk_constants.h
rm -rf ${PREFIX_INCLUDE}/udpdk_poller.h
rm -rf ${PREFIX_INCLUDE}/udpdk_types.h
rm -rf ${PREFIX_INCLUDE}/udpdk_lookup_table.h
rm -f ${PREFIX_LIB}/libudpdk.a.${UDPDK_VERSION}
rm -f ${PREFIX_LIB}/libudpdk.a
rm -f ${PREFIX_INCLUDE}/udpdk_*.h
rm -f ${PREFIX_INCLUDE}/list/udpdk_*.h
rm -f ${PREFIX_INCLUDE}/shmalloc/udpdk_*.h
cp -f libudpdk.a ${PREFIX_LIB}/libudpdk.a.${UDPDK_VERSION}
ln -sf ${PREFIX_LIB}/libudpdk.a.${UDPDK_VERSION} ${PREFIX_LIB}/libudpdk.a
cp -f udpdk_api.h ${PREFIX_INCLUDE}/udpdk_api.h
cp -f udpdk_api.h ${PREFIX_INCLUDE}/udpdk_args.h
cp -f udpdk_constants.h ${PREFIX_INCLUDE}/udpdk_constants.h
cp -f udpdk_lookup_table.h ${PREFIX_INCLUDE}/udpdk_lookup_table.h
cp -f udpdk_poller.h ${PREFIX_INCLUDE}/udpdk_poller.h
cp -f udpdk_types.h ${PREFIX_INCLUDE}/udpdk_types.h
cp -f udpdk_*.h ${PREFIX_INCLUDE}/
cp -f list/udpdk_*.h ${PREFIX_INCLUDE}/
cp -f shmalloc/udpdk_*.h ${PREFIX_INCLUDE}/
uninstall:
rm -rf ${PREFIX_LIB}/libudpdk.a.${UDPDK_VERSION}
rm -rf ${PREFIX_LIB}/libudpdk.a
rm -rf ${PREFIX_INCLUDE}/udpdk_api.h
rm -rf ${PREFIX_INCLUDE}/udpdk_args.h
rm -rf ${PREFIX_INCLUDE}/udpdk_constants.h
rm -rf ${PREFIX_INCLUDE}/udpdk_poller.h
rm -rf ${PREFIX_INCLUDE}/udpdk_types.h
rm -rf ${PREFIX_INCLUDE}/udpdk_lookup_table.h
rm -f ${PREFIX_LIB}/libudpdk.a.${UDPDK_VERSION}
rm -f ${PREFIX_LIB}/libudpdk.a
rm -f ${PREFIX_INCLUDE}/udpdk_*.h
The list implementation is derived from: https://github.com/clibs/list
Its license is included here below.
---------------------------------------------------------------------
(The MIT License)
Copyright (c) 2009-2010 TJ Holowaychuk <tj@vision-media.ca>
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the 'Software'), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//
// list.c
//
// Copyright (c) 2010 TJ Holowaychuk <tj@vision-media.ca>
//
#include "udpdk_list.h"
#include "udpdk_shmalloc.h"
extern const void *list_t_alloc;
extern const void *list_node_t_alloc;
/*
* Allocate a new list_t. NULL on failure.
*/
list_t *
list_new(void) {
list_t *self;
if (!(self = udpdk_shmalloc(list_t_alloc)))
return NULL;
self->head = NULL;
self->tail = NULL;
self->free = NULL;
self->match = NULL;
self->len = 0;
return self;
}
/*
* Free the list.
*/
void
list_destroy(list_t *self) {
unsigned int len = self->len;
list_node_t *next;
list_node_t *curr = self->head;
while (len--) {
next = curr->next;
if (self->free) self->free(curr->val);
udpdk_shfree(list_node_t_alloc, curr);
curr = next;
}
udpdk_shfree(list_t_alloc, self);
}
/*
* Append the given node to the list
* and return the node, NULL on failure.
*/
list_node_t *
list_rpush(list_t *self, list_node_t *node) {
if (!node) return NULL;
if (self->len) {
node->prev = self->tail;
node->next = NULL;
self->tail->next = node;
self->tail = node;
} else {
self->head = self->tail = node;
node->prev = node->next = NULL;
}
++self->len;
return node;
}
/*
* Return / detach the last node in the list, or NULL.
*/
list_node_t *
list_rpop(list_t *self) {
if (!self->len) return NULL;
list_node_t *node = self->tail;
if (--self->len) {
(self->tail = node->prev)->next = NULL;
} else {
self->tail = self->head = NULL;
}
node->next = node->prev = NULL;
return node;
}
/*
* Return / detach the first node in the list, or NULL.
*/
list_node_t *
list_lpop(list_t *self) {
if (!self->len) return NULL;
list_node_t *node = self->head;
if (--self->len) {
(self->head = node->next)->prev = NULL;
} else {
self->head = self->tail = NULL;
}
node->next = node->prev = NULL;
return node;
}
/*
* Prepend the given node to the list
* and return the node, NULL on failure.
*/
list_node_t *
list_lpush(list_t *self, list_node_t *node) {
if (!node) return NULL;
if (self->len) {
node->next = self->head;
node->prev = NULL;
self->head->prev = node;
self->head = node;
} else {
self->head = self->tail = node;
node->prev = node->next = NULL;
}
++self->len;
return node;
}
/*
* Insert the given node in the list to 2nd position
* (or first if the only element)
* and return the node, NULL or failure
*/
list_node_t *
list_spush(list_t *self, list_node_t *node) {
if (!node) return NULL;
if (self->len) {
node->next = self->head->next;
node->prev = self->head;
self->head->next = node;
if (node->next) {
node->next->prev = node;
}
++self->len;
return node;
} else {
return list_lpush(self, node);
}
}
/*
* Return the node associated to val or NULL.
*/
list_node_t *
list_find(list_t *self, void *val) {
list_iterator_t *it = list_iterator_new(self, LIST_HEAD);
list_node_t *node;
while ((node = list_iterator_next(it))) {
if (self->match) {
if (self->match(val, node->val)) {
list_iterator_destroy(it);
return node;
}
} else {
if (val == node->val) {
list_iterator_destroy(it);
return node;
}
}
}
list_iterator_destroy(it);
return NULL;
}
/*
* Return the node at the given index or NULL.
*/
list_node_t *
list_at(list_t *self, int index) {
list_direction_t direction = LIST_HEAD;
if (index < 0) {
direction = LIST_TAIL;
index = ~index;
}
if ((unsigned)index < self->len) {
list_iterator_t *it = list_iterator_new(self, direction);
list_node_t *node = list_iterator_next(it);
while (index--) node = list_iterator_next(it);
list_iterator_destroy(it);
return node;
}
return NULL;
}
/*
* Remove the given node from the list, freeing it and it's value.
*/
void
list_remove(list_t *self, list_node_t *node) {
node->prev
? (node->prev->next = node->next)
: (self->head = node->next);
node->next
? (node->next->prev = node->prev)
: (self->tail = node->prev);
if (self->free) self->free(node->val);
udpdk_shfree(list_node_t_alloc, node);
--self->len;
}
//
// list.h
//
// Copyright (c) 2010 TJ Holowaychuk <tj@vision-media.ca>
//
#ifndef __CLIBS_LIST_H__
#define __CLIBS_LIST_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdlib.h>
#include "udpdk_list_init.h"
// Library version
#define LIST_VERSION "0.0.5"
// Memory management macros
#ifdef LIST_CONFIG_H
#define _STR(x) #x
#define STR(x) _STR(x)
#include STR(LIST_CONFIG_H)
#undef _STR
#undef STR
#endif
/*
* list_t iterator direction.
*/
typedef enum {
LIST_HEAD
, LIST_TAIL
} list_direction_t;
/*
* list_t node struct.
*/
typedef struct list_node {
struct list_node *prev;
struct list_node *next;
void *val;
} list_node_t;
/*
* list_t struct.
*/
typedef struct {
list_node_t *head;
list_node_t *tail;
unsigned int len;
void (*free)(void *val);
int (*match)(void *a, void *b);
} list_t;
/*
* list_t iterator struct.
*/
typedef struct {
list_node_t *next;
list_direction_t direction;
} list_iterator_t;
// Node prototypes.
list_node_t *
list_node_new(void *val);
// list_t prototypes.
list_t *
list_new(void);
list_node_t *
list_rpush(list_t *self, list_node_t *node);
list_node_t *
list_lpush(list_t *self, list_node_t *node);
list_node_t *
list_find(list_t *self, void *val);
list_node_t *
list_at(list_t *self, int index);
list_node_t *
list_rpop(list_t *self);
list_node_t *
list_lpop(list_t *self);
void
list_remove(list_t *self, list_node_t *node);
void
list_destroy(list_t *self);
// list_t iterator prototypes.
list_iterator_t *
list_iterator_new(list_t *list, list_direction_t direction);
list_iterator_t *
list_iterator_new_from_node(list_node_t *node, list_direction_t direction);
list_node_t *
list_iterator_next(list_iterator_t *self);
void
list_iterator_destroy(list_iterator_t *self);
#ifdef __cplusplus
}
#endif
#endif /* __CLIBS_LIST_H__ */
#include "udpdk_list.h"
const void *list_t_alloc = NULL;
const void *list_node_t_alloc = NULL;
const void *list_iterator_t_alloc = NULL;
#include "udpdk_list.h"
#include "udpdk_shmalloc.h"
extern const void *list_t_alloc;
extern const void *list_node_t_alloc;
extern const void *list_iterator_t_alloc;
void udpdk_list_init(void)
{
list_t_alloc = udpdk_init_allocator("list_t_alloc", UDP_MAX_PORT, sizeof(list_t));
list_node_t_alloc = udpdk_init_allocator("list_node_t_alloc", NUM_SOCKETS_MAX, sizeof(list_node_t));
list_iterator_t_alloc = udpdk_init_allocator("list_iterator_t_alloc", 10, sizeof(list_iterator_t));
}
int udpdk_list_reinit(void)
{
list_t_alloc = udpdk_retrieve_allocator("list_t_alloc");
if (list_t_alloc == NULL) {
return -1;
}
list_node_t_alloc = udpdk_retrieve_allocator("list_node_t_alloc");
if (list_node_t_alloc == NULL) {
return -1;
}
list_iterator_t_alloc = udpdk_retrieve_allocator("list_iterator_t_alloc");
if (list_iterator_t_alloc == NULL) {
return -1;
}
return 0;
}
void udpdk_list_deinit(void)
{
udpdk_destroy_allocator(list_t_alloc);
udpdk_destroy_allocator(list_node_t_alloc);
udpdk_destroy_allocator(list_iterator_t_alloc);
}
//
// Created by leoll2 on 11/01/20.
// Copyright (c) 2020 Leonardo Lai. All rights reserved.
//
#ifndef UDPDK_LIST_INIT_H
#define UDPDK_LIST_INIT_H
#include "udpdk_constants.h"
#include "udpdk_types.h"
void udpdk_list_init(void);
int udpdk_list_reinit(void);
void udpdk_list_deinit(void);
#endif // UDPDK_LIST_INIT_H
//
// iterator.c
//
// Copyright (c) 2010 TJ Holowaychuk <tj@vision-media.ca>
//
#include "udpdk_list.h"
#include "udpdk_shmalloc.h"
extern void *list_iterator_t_alloc;
/*
* Allocate a new list_iterator_t. NULL on failure.
* Accepts a direction, which may be LIST_HEAD or LIST_TAIL.
*/
list_iterator_t *
list_iterator_new(list_t *list, list_direction_t direction) {
list_node_t *node = direction == LIST_HEAD
? list->head
: list->tail;
return list_iterator_new_from_node(node, direction);
}
/*
* Allocate a new list_iterator_t with the given start
* node. NULL on failure.
*/
list_iterator_t *
list_iterator_new_from_node(list_node_t *node, list_direction_t direction) {
list_iterator_t *self;
if (!(self = udpdk_shmalloc(list_iterator_t_alloc)))
return NULL;
self->next = node;
self->direction = direction;
return self;
}
/*
* Return the next list_node_t or NULL when no more
* nodes remain in the list.
*/
list_node_t *
list_iterator_next(list_iterator_t *self) {
list_node_t *curr = self->next;
if (curr) {
self->next = self->direction == LIST_HEAD
? curr->next
: curr->prev;
}
return curr;
}
/*
* Free the list iterator.
*/
void
list_iterator_destroy(list_iterator_t *self) {
udpdk_shfree(list_iterator_t_alloc, self);
self = NULL;
}
//
// node.c
//
// Copyright (c) 2010 TJ Holowaychuk <tj@vision-media.ca>
//
#include "udpdk_list.h"
#include "udpdk_shmalloc.h"
extern void *list_node_t_alloc;
/*
* Allocates a new list_node_t. NULL on failure.
*/
list_node_t *
list_node_new(void *val) {
list_node_t *self;
if (!(self = udpdk_shmalloc(list_node_t_alloc)))
return NULL;
self->prev = NULL;
self->next = NULL;
self->val = val;
return self;
}
//
// Created by leoll2 on 11/01/20.
// Copyright (c) 2020 Leonardo Lai. All rights reserved.
//
#include <string.h>
#include <rte_common.h>
#include <rte_memory.h>
#include <rte_memzone.h>
#include "udpdk_shmalloc.h"
#define RTE_LOGTYPE_SHM RTE_LOGTYPE_USER1
#define SetBit(A,k) (A[(k / 32)] |= (1 << (k % 32)))
#define ClearBit(A,k) (A[(k / 32)] &= ~(1 << (k % 32)))
#define TestBit(A,k) (A[(k / 32)] & (1 << (k % 32)))
struct allocator {
unsigned size;
unsigned elem_size; // size (byte) of each element
unsigned n_free;
unsigned next_free; // index of next free
unsigned pool_offset; // offset (bytes) from the begin of memzone
};
const struct rte_memzone *udpdk_init_allocator(const char *name, unsigned size, unsigned elem_size)
{
unsigned mem_needed;
unsigned p_off;
const struct rte_memzone *mz;
struct allocator *all;
const int *free_bitfield;
//Round-up elem_size to cache line multiple (64 byte)
elem_size = (elem_size + 64 - 1) / 64;
// Determine how much memory is needed (pool size + bitfield of free elems + variables)
mem_needed = sizeof(struct allocator) + (size / 8 + 1);
mem_needed = (mem_needed + elem_size - 1) / elem_size; // align
p_off = mem_needed;
mem_needed += (size * elem_size);
// Allocate the memory for the allocator and its pool
mz = rte_memzone_reserve(name, mem_needed, rte_socket_id(), 0);
if (mz == NULL) {
return NULL;
}
// Initialize the allocator internal variables
all = (struct allocator *)(void *)mz->addr;
all->size = size;
all->elem_size = elem_size;
all->n_free = size;
all->next_free = 0;
all->pool_offset = p_off;
// Mark all the elements as free
free_bitfield = (int *)(all + 1);
memset((void *)free_bitfield, 0, (size / 8 + 1));
return mz;
}
const struct rte_memzone *udpdk_retrieve_allocator(const char *name)
{
return rte_memzone_lookup(name);
}
void *udpdk_shmalloc(const struct rte_memzone *mz)
{
struct allocator *all;
int *free_bitfield;
unsigned size;
unsigned p_off;
unsigned i, j;
void *ret;
all = (struct allocator *)(void *)mz->addr;
free_bitfield = (int *)(all + 1);
if (all->n_free == 0) {
RTE_LOG(WARNING, SHM, "shmalloc failed: out of memory\n");
return NULL;
}
size = all->size;
p_off = all->pool_offset;
// Compute and store the pointer to return
ret = ((void *)mz->addr + p_off + (all->next_free * all->elem_size));
--all->n_free;
// Update the free bitfield
SetBit(free_bitfield, all->next_free);
// Find the next free slot
if (all->n_free != 0) {
j = all->next_free + 1;
for (int i = 0; i < size; i++) {
if (j >= size) {
j = 0;
}
if (!TestBit(free_bitfield, j)) {
all->next_free = j;
break;
}
++j;
}
}
return ret;
}
// NOTE: the memzone is only needed to check memory boundaries
void udpdk_shfree(const struct rte_memzone *mz, void *addr)
{
struct allocator *all;
int *free_bitfield;
unsigned p_off;
unsigned i;
void *pool_start;
void *pool_end;
all = (struct allocator *)(void *)mz->addr;
free_bitfield = (int *)(all + 1);
p_off = all->pool_offset;
// Validate the address
pool_start = (void *)mz->addr + p_off;
pool_end = ((void *)mz->addr + p_off + (all->size * all->elem_size));
if ((addr < pool_start) || (addr >= pool_end)) {
RTE_LOG(WARNING, SHM, "Double free\n");
return;
}
// Check if the memory was really allocated
i = (addr - pool_start) / all->elem_size;
if (!TestBit(free_bitfield, i)) {
RTE_LOG(WARNING, SHM, "Double free\n");
return;
}
// Free
ClearBit(free_bitfield, i);
++all->n_free;
// If prevoiusly full, recompute next_free
if (all->n_free == 1) {
all->next_free = i;
}
}
void udpdk_destroy_allocator(const struct rte_memzone *mz)
{
const struct allocator *all;
all = (struct allocator *)(void *)mz->addr;
if (all->n_free != all->size) {
RTE_LOG(WARNING, SHM, "Destroying shm allocator before all the elements were freed!\n");
}
rte_memzone_free(mz);
}
//
// Created by leoll2 on 11/01/20.
// Copyright (c) 2020 Leonardo Lai. All rights reserved.
//
#ifndef UDPDK_SHMALLOC_H
#define UDPDK_SHMALLOC_H
#include <rte_compat.h>
#include <rte_memory.h>
#include <rte_common.h>
const struct rte_memzone *udpdk_init_allocator(const char *name, unsigned size, unsigned elem_size);
const struct rte_memzone *udpdk_retrieve_allocator(const char *name);
void *udpdk_shmalloc(const struct rte_memzone *mz);
void udpdk_shfree(const struct rte_memzone *mz, void *addr);
void udpdk_destroy_allocator(const struct rte_memzone *mz);
#endif // UDPDK_SHMALLOC_H
......@@ -24,6 +24,12 @@ void udpdk_cleanup(void);
int udpdk_socket(int domain, int type, int protocol);
int udpdk_getsockopt(int sockfd, int level, int optname, void *optval,
socklen_t *optlen);
int udpdk_setsockopt(int sockfd, int level, int optname,
const void *optval, socklen_t optlen);
int udpdk_bind(int s, const struct sockaddr *addr, socklen_t addrlen);
ssize_t udpdk_sendto(int sockfd, const void *buf, size_t len, int flags,
......
......@@ -2,7 +2,9 @@ udpdk_init
udpdk_interrupt
udpdk_cleanup
udpdk_socket
udpdk_getsockopt
udpdk_setsockopt
udpdk_bind
udpdk_sendto
udpdk_recvfrom
udpdk_close
\ No newline at end of file
udpdk_close
//
// Created by leoll2 on 9/27/20.
// Copyright (c) 2020 Leonardo Lai. All rights reserved.
//
// Data structure to hold (ip, port) pairs of bound sockets
// It is an array of size MAX_PORTS of lists; each list contains all
// the IPs bound to that port (typically one, but can be many)
//
#include <arpa/inet.h> // inet_ntop
#include <netinet/in.h> // INADDR_ANY
#include "udpdk_bind_table.h"
#include "udpdk_shmalloc.h"
#define RTE_LOGTYPE_BTABLE RTE_LOGTYPE_USER1
const void *bind_info_alloc = NULL;
list_t **sock_bind_table;
/* Initialize the bindings table */
void btable_init(void)
{
// Create the allocator for bind_info elements
bind_info_alloc = udpdk_init_allocator("bind_info_alloc", NUM_SOCKETS_MAX, sizeof(struct bind_info));
// All ports are initially free
for (unsigned i = 0; i < UDP_MAX_PORT; i++) {
sock_bind_table[i] = NULL;
}
}
/* Get the index of a free port (-1 if none available) */
int btable_get_free_port(void)
{
for (unsigned i = 0; i < UDP_MAX_PORT; i++) {
if (sock_bind_table[i] == NULL) {
return i;
}
}
RTE_LOG(WARNING, BTABLE, "Failed to find a free port\n");
return -1;
}
/* Verify if binding the pair (ip, port) is possible, provided the
* options and the previous bindings.
*/
static inline bool btable_can_bind(struct in_addr ip, int port, int opts)
{
bool reuse_addr = opts & SO_REUSEADDR;
bool reuse_port = opts & SO_REUSEPORT;
bool can_bind = true;
list_iterator_t *it;
list_node_t *node;
unsigned long ip_oth, ip_new;
// bool oth_reuseaddr;
bool oth_reuseport;
if (sock_bind_table[port] == NULL) {
return true;
}
ip_new = ip.s_addr;
it = list_iterator_new(sock_bind_table[port], LIST_HEAD);
while ((node = list_iterator_next(it))) {
ip_oth = ((struct bind_info *)(node->val))->ip_addr.s_addr;
// oth_reuseaddr = ((struct bind_info *)(node->val))->reuse_addr;
oth_reuseport = ((struct bind_info *)(node->val))->reuse_port;
// If different, and none is INADDR_ANY, continue
if ((ip_oth != ip_new) && (ip_oth != INADDR_ANY) && (ip_new != INADDR_ANY)) {
continue;
}
// If different, one is INADDR_ANY, and the new has SO_REUSEADDR or SO_REUSEPORT, continue
if ((ip_oth != ip_new) && ((ip_oth == INADDR_ANY) || (ip_new != INADDR_ANY))
&& ((opts & SO_REUSEADDR) || (opts & SO_REUSEPORT))) {
continue;
}
// If same, not INADDR_ANY and both have SO_REUSEPORT, continue
if ((ip_oth == ip_new) && (ip_new != INADDR_ANY)
&& (opts & SO_REUSEPORT) && oth_reuseport) {
continue;
}
can_bind = false;
break;
}
list_iterator_destroy(it);
return can_bind;
}
int btable_add_binding(int s, struct in_addr ip, int port, int opts)
{
struct bind_info *b;
list_node_t *ln;
// Check if binding this pair is allowed
if (!btable_can_bind(ip, port, opts)) {
char buf[INET_ADDRSTRLEN];
inet_ntop(AF_INET, &ip, buf, sizeof(buf));
RTE_LOG(WARNING, BTABLE, "Cannot bind socket %d to %s:%d\n", s, buf, ntohs(port));
return -1;
}
// Allocate the list if missing
if (sock_bind_table[port] == NULL) {
sock_bind_table[port] = list_new();
}
// Allocate and setup a new bind_info element
b = (struct bind_info *)udpdk_shmalloc(bind_info_alloc);
b->sockfd = s;
b->ip_addr = ip;
b->reuse_addr = opts & SO_REUSEADDR;
b->reuse_port = opts & SO_REUSEPORT;
b->closed = false;
// Insert the bind_info in the list
ln = list_node_new(b);
if (ip.s_addr == INADDR_ANY) {
list_lpush(sock_bind_table[port], ln);
} else {
list_rpush(sock_bind_table[port], ln);
}
return 0;
}
/* Remove a binding from the port */
void btable_del_binding(int s, int port) {
list_node_t *node;
list_iterator_t *it;
// Remove the binding from the lsit
it = list_iterator_new(sock_bind_table[port], LIST_HEAD);
while ((node = list_iterator_next(it))) {
if (((struct bind_info *)(node->val))->sockfd == s) {
udpdk_shfree(bind_info_alloc, node->val);
list_remove(sock_bind_table[port], node);
break;
}
}
list_iterator_destroy(it);
// If no more bindings left, free the port
if (sock_bind_table[port]->len == 0) {
list_destroy(sock_bind_table[port]);
sock_bind_table[port] = NULL;
}
}
/* Get all the bind_info descriptors of the sockets bound to the given port */
list_t *btable_get_bindings(int port) {
return sock_bind_table[port];
}
/* Destroy the bindings table */
void btable_destroy(void)
{
udpdk_destroy_allocator(bind_info_alloc);
}
//
// Created by leoll2 on 9/28/20.
// Copyright (c) 2020 Leonardo Lai. All rights reserved.
//
#ifndef UDPDK_BIND_TABLE_H
#define UDPDK_BIND_TABLE_H
#include "udpdk_constants.h"
#include "udpdk_types.h"
void btable_init(void);
int btable_get_free_port(void);
int btable_add_binding(int s, struct in_addr ip, int port, int opts);
void btable_del_binding(int s, int port);
list_t *btable_get_bindings(int port);
void btable_destroy(void);
#endif //UDPDK_BIND_TABLE_H
......@@ -10,6 +10,7 @@
#define MIN(a,b) ((a) < (b) ? a : b)
#define NUM_SOCKETS_MAX 1024
#define UDP_MAX_PORT 65536
/* DPDK ports */
#define PORT_RX 0
......@@ -51,7 +52,7 @@
#define EXCH_BUF_SIZE 32
/* L4 port switching */
#define UDP_PORT_TABLE_NAME "UDPDK_UDP_port_table"
#define UDP_BIND_TABLE_NAME "UDPDK_btable"
/* IPv4 header */
#define IP_DEFTTL 64
......
......@@ -4,7 +4,6 @@
//
#include "udpdk_constants.h"
#include "udpdk_lookup_table.h"
#include "udpdk_types.h"
volatile int interrupted = 0;
......@@ -27,8 +26,6 @@ struct rte_mempool *tx_pktmbuf_direct_pool = NULL;
struct rte_mempool *tx_pktmbuf_indirect_pool = NULL;
htable_item *udp_port_table = NULL;
struct exch_zone_info *exch_zone_desc = NULL;
struct exch_slot *exch_slots = NULL;
\ No newline at end of file
struct exch_slot *exch_slots = NULL;
......@@ -20,10 +20,11 @@
#include <rte_memory.h>
#include <rte_memzone.h>
#include "udpdk_list.h"
#include "udpdk_api.h"
#include "udpdk_args.h"
#include "udpdk_constants.h"
#include "udpdk_lookup_table.h"
#include "udpdk_bind_table.h"
#include "udpdk_poller.h"
#include "udpdk_types.h"
......@@ -34,11 +35,11 @@
extern int interrupted;
extern struct exch_zone_info *exch_zone_desc;
extern struct exch_slot *exch_slots;
extern htable_item *udp_port_table;
extern struct rte_mempool *rx_pktmbuf_pool;
extern struct rte_mempool *tx_pktmbuf_pool;
extern struct rte_mempool *tx_pktmbuf_direct_pool;
extern struct rte_mempool *tx_pktmbuf_indirect_pool;
extern list_t **sock_bind_table;
extern int primary_argc;
extern int secondary_argc;
extern char *primary_argv[MAX_ARGC];
......@@ -222,7 +223,7 @@ static void check_port_link_status(uint16_t portid) {
}
/* Initialize a shared memory region to contain descriptors for the exchange slots */
static int init_shared_memzone(void)
static int init_exch_memzone(void)
{
const struct rte_memzone *mz;
......@@ -237,21 +238,40 @@ static int init_shared_memzone(void)
return 0;
}
/* Initialize table in shared memory for UDP port switching */
static int init_udp_table(void)
static int destroy_exch_memzone(void)
{
const struct rte_memzone *mz;
mz = rte_memzone_reserve(UDP_PORT_TABLE_NAME, NUM_SOCKETS_MAX * sizeof(htable_item), rte_socket_id(), 0);
mz = rte_memzone_lookup(EXCH_MEMZONE_NAME);
return rte_memzone_free(mz);
}
/* Initialize a shared memory region to store the L4 switching table */
static int init_udp_bind_table(void)
{
const struct rte_memzone *mz;
mz = rte_memzone_reserve(UDP_BIND_TABLE_NAME, UDP_MAX_PORT * sizeof(struct list_t *), rte_socket_id(), 0);
if (mz == NULL) {
RTE_LOG(ERR, INIT, "Cannot allocate shared memory for UDP port switching table\n");
RTE_LOG(ERR, INIT, "Cannot allocate shared memory for L4 switching table\n");
return -1;
}
udp_port_table = mz->addr;
htable_init(udp_port_table);
sock_bind_table = mz->addr;
btable_init();
return 0;
}
/* Destroy table for UDP port switching */
static int destroy_udp_bind_table(void)
{
const struct rte_memzone *mz;
btable_destroy();
mz = rte_memzone_lookup(UDP_BIND_TABLE_NAME);
return rte_memzone_free(mz);
}
/* Initialize slots to exchange packets between the application and the poller */
static int init_exchange_slots(void)
{
......@@ -303,6 +323,9 @@ int udpdk_init(int argc, char *argv[])
return -1;
}
// Initialize the list allocators
udpdk_list_init();
// Initialize pools of mbuf
retval = init_mbuf_pools();
if (retval < 0) {
......@@ -330,13 +353,13 @@ int udpdk_init(int argc, char *argv[])
}
// Initialize memzone for exchange
retval = init_shared_memzone();
retval = init_exch_memzone();
if (retval < 0) {
RTE_LOG(ERR, INIT, "Cannot initialize memzone for exchange zone descriptors\n");
return -1;
}
retval = init_udp_table();
retval = init_udp_bind_table();
if (retval < 0) {
RTE_LOG(ERR, INIT, "Cannot create table for UDP port switching\n");
return -1;
......@@ -365,6 +388,16 @@ void udpdk_interrupt(int signum)
interrupted = 1;
}
static void udpdk_close_all_sockets(void)
{
for (int s = 0; s < NUM_SOCKETS_MAX; s++) {
if (exch_zone_desc->slots[s].bound) {
RTE_LOG(INFO, CLOSE, "Closing socket %d that was left open\n", s);
udpdk_close(s);
}
}
}
void udpdk_cleanup(void)
{
uint16_t port_id;
......@@ -385,4 +418,16 @@ void udpdk_cleanup(void)
rte_eth_dev_stop(port_id);
rte_eth_dev_close(port_id);
}
}
\ No newline at end of file
// Close all open sockets
udpdk_close_all_sockets();
// Free the memory of L4 switching table
destroy_udp_bind_table();
// Free the memory for exch zone
destroy_exch_memzone();
// Release linked-list memory allocators
udpdk_list_deinit();
}
//
// Created by leoll2 on 9/27/20.
// Copyright (c) 2020 Leonardo Lai. All rights reserved.
//
// Simple hashmap with fixed size, useful to implement L4 port switching.
// Keys and values must be non-negative integers (-1 is reserved for 'empty')
//
#include "udpdk_lookup_table.h"
#define h(x) (9649 * x % NUM_SOCKETS_MAX)
void htable_init(htable_item *table)
{
for (int i = 0; i < NUM_SOCKETS_MAX; i++) {
table[i].key = -1;
}
}
static inline int htable_get_idx(htable_item *table, int key)
{
int i = h(key);
int free_idx = -1;
int scanned = 0;
// find a free slot (linear probing starting from hashed index)
while ((table[i].key != key) && (++scanned < NUM_SOCKETS_MAX)) {
if (free_idx == -1 && table[i].key == -1) {
// store the first free index
free_idx = i;
}
i++;
if (i == NUM_SOCKETS_MAX) {
i = 0;
}
}
// table is full
if (scanned == NUM_SOCKETS_MAX) {
return free_idx;
}
return i;
}
inline int htable_insert(htable_item *table, int key, int val)
{
int i = htable_get_idx(table, key);
if (i == -1) {
// full
return -1;
}
table[i].key = key;
table[i].val = val;
return 0;
}
inline int htable_delete(htable_item *table, int key)
{
int i = htable_get_idx(table, key);
if (i == -1) {
// not found (table full)
return -1;
} else if (table[i].key == -1) {
// not found (table not full)
return -1;
} else {
// remove
table[i].key = -1;
return 0;
}
}
inline int htable_lookup(htable_item *table, int key)
{
int i = htable_get_idx(table, key);
if (i == -1) {
// not found (table full)
return -1;
} else if (table[i].key == -1) {
// not found (table not full)
return -1;
} else {
return table[i].val;
}
}
//
// Created by leoll2 on 9/28/20.
// Copyright (c) 2020 Leonardo Lai. All rights reserved.
//
#ifndef UDPDK_LOOKUP_TABLE_H
#define UDPDK_LOOKUP_TABLE_H
#include "udpdk_constants.h"
typedef struct htable_item {
int key;
int val;
} htable_item;
void htable_init(htable_item *table);
int htable_insert(htable_item *table, int key, int val);
int htable_lookup(htable_item *table, int key);
int htable_delete(htable_item *table, int key);
#endif //UDPDK_LOOKUP_TABLE_H
......@@ -25,7 +25,8 @@
#include <rte_string_fns.h>
#include "udpdk_constants.h"
#include "udpdk_lookup_table.h"
#include "udpdk_bind_table.h"
#include "udpdk_shmalloc.h"
#include "udpdk_types.h"
#define RTE_LOGTYPE_POLLBODY RTE_LOGTYPE_USER1
......@@ -36,7 +37,8 @@ static volatile int poller_alive = 1;
extern struct exch_zone_info *exch_zone_desc;
extern struct exch_slot *exch_slots;
extern htable_item *udp_port_table;
extern list_t **sock_bind_table;
extern const void *bind_info_alloc;
/* Descriptor of a RX queue */
struct rx_queue {
......@@ -82,6 +84,22 @@ static inline const char * get_exch_ring_name(unsigned id, enum exch_ring_func f
return buffer;
}
/* Initialize the allocators */
static int setup_allocators(void)
{
bind_info_alloc = udpdk_retrieve_allocator("bind_info_alloc");
if (bind_info_alloc == NULL) {
RTE_LOG(ERR, POLLINIT, "Cannot retrieve bind_info shmem allocator\n");
return -1;
}
if (udpdk_list_reinit() < 0) {
RTE_LOG(ERR, POLLINIT, "Cannot retrieve list shmem allocators\n");
return -1;
}
return 0;
}
/* Initialize the queues for this lcore */
static int setup_queues(void)
{
......@@ -170,14 +188,14 @@ static int setup_exch_zone(void)
static int setup_udp_table(void)
{
const struct rte_memzone *udp_port_table_mz;
const struct rte_memzone *sock_bind_table_mz;
udp_port_table_mz = rte_memzone_lookup(UDP_PORT_TABLE_NAME);
if (udp_port_table_mz == NULL) {
RTE_LOG(ERR, POLLINIT, "Cannot retrieve exchange memzone descriptor\n");
sock_bind_table_mz = rte_memzone_lookup(UDP_BIND_TABLE_NAME);
if (sock_bind_table_mz == NULL) {
RTE_LOG(ERR, POLLINIT, "Cannot retrieve L4 switching table memory\n");
return -1;
}
udp_port_table = udp_port_table_mz->addr;
sock_bind_table = sock_bind_table_mz->addr;
return 0;
}
......@@ -194,6 +212,13 @@ int poller_init(int argc, char *argv[])
return -1;
}
// Setup memory allocators
retval = setup_allocators();
if (retval < 0) {
RTE_LOG(ERR, POLLINIT, "Cannot setup allocators for poller\n");
return -1;
}
// Setup RX/TX queues
retval = setup_queues();
if (retval < 0) {
......@@ -258,6 +283,11 @@ static inline uint16_t get_udp_dst_port(struct rte_udp_hdr *udp_hdr)
return udp_hdr->dst_port;
}
static inline unsigned long get_ipv4_dst_addr(struct rte_ipv4_hdr *ip_hdr)
{
return ip_hdr->dst_addr;
}
static inline void reassemble(struct rte_mbuf *m, uint16_t portid, uint32_t queue,
struct lcore_queue_conf *qconf, uint64_t tms)
{
......@@ -267,7 +297,10 @@ static inline void reassemble(struct rte_mbuf *m, uint16_t portid, uint32_t queu
struct rte_ip_frag_death_row *dr;
struct rx_queue *rxq;
uint16_t udp_dst_port;
unsigned long ip_dst_addr;
int sock_id;
bool delivered_once = false;
bool delivered_last = false;
rxq = &qconf->rx_queue;
......@@ -310,17 +343,47 @@ static inline void reassemble(struct rte_mbuf *m, uint16_t portid, uint32_t queu
RTE_LOG(WARNING, POLLBODY, "Received non-UDP packet.\n");
return;
}
udp_dst_port = get_udp_dst_port(
(struct rte_udp_hdr *)((unsigned char *)ip_hdr + sizeof(struct rte_ipv4_hdr)));
// Find the sock_id corresponding to the UDP dst port (L4 switching) and enqueue the packet to its queue
sock_id = htable_lookup(udp_port_table, udp_dst_port);
if (sock_id < 0 || sock_id >= NUM_SOCKETS_MAX) {
errno = EINVAL;
RTE_LOG(ERR, POLLBODY, "Invalid L4 port mapping: port %d maps to sock_id %d\n", udp_dst_port, sock_id);
udp_dst_port = get_udp_dst_port((struct rte_udp_hdr *)(ip_hdr + 1));
ip_dst_addr = get_ipv4_dst_addr(ip_hdr);
// Find the sock_ids corresponding to the UDP dst port (L4 switching) and enqueue the packet to its queue
list_t *binds = btable_get_bindings(udp_dst_port);
if (binds == NULL) {
RTE_LOG(WARNING, POLLBODY, "Dropping packet for port %d: no socket bound\n", ntohs(udp_dst_port));
return;
}
enqueue_rx_packet(sock_id, m);
list_iterator_t *it = list_iterator_new(binds, LIST_HEAD);
list_node_t *node;
while ((node = list_iterator_next(it))) {
unsigned long ip_oth = ((struct bind_info *)(node->val))->ip_addr.s_addr;
bool oth_reuseaddr = ((struct bind_info *)(node->val))->reuse_addr;
bool oth_reuseport = ((struct bind_info *)(node->val))->reuse_port;
// TODO the semantic should be more complex actually:
// if dest unicast and SO_REUSEPORT, should load balance
// if dest broadcast and SO_REUSEADDR or SO_REUSEPORT, should deliver to all
// If matching
if (likely((ip_dst_addr == ip_oth) || (ip_oth == INADDR_ANY))) {
// Deliver to this socket
enqueue_rx_packet(((struct bind_info *)(node->val))->sockfd, m);
delivered_once = true;
// If other socket may exist on the same port, keep scanning
if (oth_reuseaddr || oth_reuseport) {
m = rte_pktmbuf_clone(m, rxq->pool);
delivered_last = false;
continue;
} else {
delivered_last = true;
break;
}
}
}
if (!delivered_last) {
rte_pktmbuf_free(m);
}
if (!delivered_once) {
RTE_LOG(WARNING, POLLBODY, "Dropped packet to port %d: no socket matching\n", ntohs(udp_dst_port));
}
list_iterator_destroy(it);
}
static inline void flush_tx_table(struct rte_mbuf **tx_mbuf_table, uint16_t tx_count)
......
......@@ -10,13 +10,12 @@
#include <rte_random.h>
#include "udpdk_api.h"
#include "udpdk_lookup_table.h"
#include "udpdk_bind_table.h"
#define RTE_LOGTYPE_SYSCALL RTE_LOGTYPE_USER1
extern int interrupted;
extern configuration config;
extern htable_item *udp_port_table;
extern struct exch_zone_info *exch_zone_desc;
extern struct exch_slot *exch_slots;
extern struct rte_mempool *tx_pktmbuf_pool;
......@@ -65,6 +64,7 @@ int udpdk_socket(int domain, int type, int protocol)
exch_zone_desc->slots[sock_id].used = 1;
exch_zone_desc->slots[sock_id].bound = 0;
exch_zone_desc->slots[sock_id].sockfd = sock_id;
exch_zone_desc->slots[sock_id].so_options = 0;
break;
}
}
......@@ -80,6 +80,117 @@ int udpdk_socket(int domain, int type, int protocol)
return sock_id;
}
static int getsetsockopt_validate_args(int sockfd, int level, int optname,
const void *optval, socklen_t *optlen)
{
// Check if the sockfd is valid
if (!exch_zone_desc->slots[sockfd].used) {
errno = EBADF;
RTE_LOG(ERR, SYSCALL, "Invalid socket descriptor (%d)\n", sockfd);
return -1;
}
// Check that level is supported
if (level != SOL_SOCKET) {
RTE_LOG(ERR, SYSCALL, "Level %d does not exist or is unsupported\n", level);
errno = EINVAL;
return -1;
}
// Check if option is supported
switch (optname) {
case SO_REUSEADDR:
break;
case SO_REUSEPORT:
break;
default:
errno = ENOPROTOOPT;
RTE_LOG(ERR, SYSCALL, "Invalid or unsupported option %d at level %d\n", optname, level);
return -1;
}
// Check that optval and optlen are not NULL
if (optval == NULL || optlen == NULL) {
errno = EFAULT;
RTE_LOG(ERR, SYSCALL, "optval and optlen cannot be NULL\n");
return -1;
}
return 0;
}
int udpdk_getsockopt(int sockfd, int level, int optname, void *optval, socklen_t *optlen)
{
// Validate the arguments
if (getsetsockopt_validate_args(sockfd, level, optname, optval, optlen) < 0) {
return -1;
}
// Handle the request
switch (level) {
case SOL_SOCKET:
switch (optname) {
case SO_REUSEADDR:
*(int *)optval = ((exch_zone_desc->slots[sockfd].so_options & SO_REUSEADDR) != 0);
break;
case SO_REUSEPORT:
*(int *)optval = ((exch_zone_desc->slots[sockfd].so_options & SO_REUSEPORT) != 0);
break;
default:
errno = ENOPROTOOPT;
RTE_LOG(ERR, SYSCALL, "Invalid or unsupported option %d at level %d\n", optname, level);
return -1;
}
break;
default:
errno = EINVAL;
RTE_LOG(ERR, SYSCALL, "Level %d does not exist or is unsupported\n", level);
return -1;
}
return 0;
}
int udpdk_setsockopt(int sockfd, int level, int optname, const void *optval, socklen_t optlen)
{
int prev_set;
// Validate the arguments
if (getsetsockopt_validate_args(sockfd, level, optname, optval, &optlen) < 0) {
return -1;
}
// Handle the request
switch (level) {
case SOL_SOCKET:
switch (optname) {
case SO_REUSEADDR:
prev_set = exch_zone_desc->slots[sockfd].so_options & SO_REUSEADDR;
if ((*(int *)optval != 0) && (!prev_set)) { // set
exch_zone_desc->slots[sockfd].so_options |= SO_REUSEADDR;
} else if ((*(int *)optval == 0) && (prev_set)) { // reset
exch_zone_desc->slots[sockfd].so_options &= ~SO_REUSEADDR;
}
break;
case SO_REUSEPORT:
prev_set = exch_zone_desc->slots[sockfd].so_options & SO_REUSEPORT;
if ((*(int *)optval != 0) && (!prev_set)) { // set
exch_zone_desc->slots[sockfd].so_options |= SO_REUSEPORT;
} else if ((*(int *)optval == 0) && (prev_set)) { // reset
exch_zone_desc->slots[sockfd].so_options &= ~SO_REUSEPORT;
}
break;
default:
errno = ENOPROTOOPT;
RTE_LOG(ERR, SYSCALL, "Invalid or unsupported option %d at level %d\n", optname, level);
return -1;
}
break;
default:
errno = EINVAL;
RTE_LOG(ERR, SYSCALL, "Level %d does not exist or is unsupported\n", level);
return -1;
}
return 0;
}
static int bind_validate_args(int sockfd, const struct sockaddr *addr, socklen_t addrlen)
{
// Check if the sockfd is valid
......@@ -107,7 +218,6 @@ static int bind_validate_args(int sockfd, const struct sockaddr *addr, socklen_t
int udpdk_bind(int sockfd, const struct sockaddr *addr, socklen_t addrlen)
{
int ret;
unsigned short port;
const struct sockaddr_in *addr_in = (struct sockaddr_in *)addr;
......@@ -116,29 +226,20 @@ int udpdk_bind(int sockfd, const struct sockaddr *addr, socklen_t addrlen)
return -1;
}
// Check if the port is already being used
// Try to bind the socket
port = addr_in->sin_port;
ret = htable_lookup(udp_port_table, port);
if (ret != -1) {
errno = EINVAL;
RTE_LOG(ERR, SYSCALL, "Failed to bind because port %d is already in use\n", port);
if (btable_add_binding(sockfd, addr_in->sin_addr, port, exch_zone_desc->slots[sockfd].so_options) < 0) {
errno = EADDRINUSE;
RTE_LOG(ERR, SYSCALL, "Failed to bind because port %d is already in use\n", ntohs(port));
return -1;
}
// Mark the slot as bound, and store the corresponding IP and port
exch_zone_desc->slots[sockfd].bound = 1;
exch_zone_desc->slots[sockfd].udp_port = (int)port;
if (addr_in->sin_addr.s_addr == INADDR_ANY) {
// If INADDR_ANY, use the address from the configuration file
exch_zone_desc->slots[sockfd].ip_addr = config.src_ip_addr;
} else {
// If the address is explicitly set, bind to that
exch_zone_desc->slots[sockfd].ip_addr = addr_in->sin_addr;
}
exch_zone_desc->slots[sockfd].ip_addr = addr_in->sin_addr;
// Insert in the hashtable (port, sock_id)
htable_insert(udp_port_table, (int)port, sockfd);
RTE_LOG(INFO, SYSCALL, "Binding port %d to sock_id %d\n", port, sockfd);
RTE_LOG(INFO, SYSCALL, "Binding port %d to sock_id %d\n", ntohs(port), sockfd);
return 0;
}
......@@ -174,23 +275,6 @@ static int sendto_validate_args(int sockfd, const void *buf, size_t len, int fla
return 0;
}
// TODO move this elsewhere
static int get_free_udp_port(void)
{
int port;
if (exch_zone_desc->n_zones_active == NUM_SOCKETS_MAX) {
// No port available
return -1;
}
// Generate a random unused port
do {
port = (uint16_t)rte_rand();
} while (htable_lookup(udp_port_table, port) != -1);
return port;
}
ssize_t udpdk_sendto(int sockfd, const void *buf, size_t len, int flags,
const struct sockaddr *dest_addr, socklen_t addrlen)
{
......@@ -212,7 +296,7 @@ ssize_t udpdk_sendto(int sockfd, const void *buf, size_t len, int flags,
memset(&saddr_in, 0, sizeof(saddr_in));
saddr_in.sin_family = AF_INET;
saddr_in.sin_addr.s_addr = INADDR_ANY;
saddr_in.sin_port = get_free_udp_port();
saddr_in.sin_port = btable_get_free_port();
if (udpdk_bind(sockfd, (const struct sockaddr *)&saddr_in, sizeof(saddr_in)) < 0) {
RTE_LOG(ERR, SYSCALL, "Send failed to bind\n");
return -1;
......@@ -242,7 +326,12 @@ ssize_t udpdk_sendto(int sockfd, const void *buf, size_t len, int flags,
ip_hdr->time_to_live = IP_DEFTTL;
ip_hdr->next_proto_id = IPPROTO_UDP;
ip_hdr->packet_id = 0;
ip_hdr->src_addr = exch_zone_desc->slots[sockfd].ip_addr.s_addr;
if ((exch_zone_desc->slots[sockfd].bound)
&& (exch_zone_desc->slots[sockfd].ip_addr.s_addr != INADDR_ANY)) {
ip_hdr->src_addr = exch_zone_desc->slots[sockfd].ip_addr.s_addr;
} else {
ip_hdr->src_addr = config.src_ip_addr.s_addr;
}
ip_hdr->dst_addr = dest_addr_in->sin_addr.s_addr;
ip_hdr->total_length = rte_cpu_to_be_16(len + sizeof(*ip_hdr) + sizeof(*udp_hdr));
ip_hdr->hdr_checksum = rte_ipv4_cksum(ip_hdr);
......@@ -415,9 +504,15 @@ int udpdk_close(int s)
return -1;
}
// Unbind
if (exch_zone_desc->slots[s].bound) {
btable_del_binding(s, exch_zone_desc->slots[s].udp_port);
}
// Reset slot
exch_zone_desc->slots[s].bound = 0;
exch_zone_desc->slots[s].used = 0;
exch_zone_desc->slots[s].so_options = 0;
// Decrement counter of active slots
exch_zone_desc->n_zones_active++;
......
......@@ -17,20 +17,31 @@
#include <rte_memzone.h>
#include <netinet/in.h>
#include <stdbool.h>
#include <stdlib.h>
#include <sys/types.h>
#include <unistd.h>
#include "udpdk_constants.h"
#include "udpdk_list.h"
enum exch_ring_func {EXCH_RING_RX, EXCH_RING_TX};
struct bind_info {
int sockfd; // socket fd of the (addr, port) pair
struct in_addr ip_addr; // IPv4 address associated to the socket
bool reuse_addr; // SO_REUSEADDR
bool reuse_port; // SO_REUSEPORT
bool closed; // mark this binding as closed
};
struct exch_slot_info {
int used; // used by an open socket
int bound; // used by a socket that did 'bind'
int sockfd; // TODO redundant because it matches the slot index in this implementation
int udp_port; // UDP port associated to the socket (only if bound)
struct in_addr ip_addr; // IPv4 address associated to the socket (only if bound)
int so_options; // socket options
} __rte_cache_aligned;
struct exch_zone_info {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment