Linux system development, Software

Getting Linux routing table using netlink.

In the previous article we discussed monitoring of the network interfaces using netlink. Now it’s time to do something more complex and interesting.
Let’s discover how to get and print system routing table like a “ip route” command do.

The routing table is a runtime in-memory data structure that stores the routes (and in some cases, metrics associated with those routes) to particular network destinations. This is very important with TCP/IP. Using this table network stack decides where and how to put packets for specified network.
Linux kernel support multiple routing tables. Beyond the two commonly used routing tables (the local and main routing tables), the kernel supports up to 252 additional routing tables.
The multiple routing table system provides a flexible infrastructure on top of which to implement policy routing. By allowing multiple traditional routing tables (keyed primarily to destination address) to be combined with the routing policy database (RPDB) (keyed primarily to source address), the kernel supports a well-known and well-understood interface while simultaneously expanding and extending its routing capabilities.

To get Linux main routing table we can use commands “route -n”, “netstat -rn” and “ip route”:

$route -n

Destination Gateway Genmask Flags Metric Ref Use Iface
0.0.0.0 192.168.8.1 0.0.0.0 UG 0 0 0 eth0
169.254.0.0 0.0.0.0 255.255.0.0 U 1000 0 0 eth0
192.168.8.0 0.0.0.0 255.255.255.0 U 0 0 0 eth0

First utilities is used classic ioctl interface to get information from the kernel. This way is limited and became deprecated now.
Instead of ioctl “ip route” is based on the netlink sockets and now we discover how it works.

Like in the monitor everything starts with creation of the netlink socket and binding. Binding here is very important, this allows us to execute this program as a normal user.

struct sockaddr_nl saddr;

/* Open raw socket for the NETLINK_ROUTE protocol */
int nl_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);

if (nl_sock < 0) {
	perror("Failed to open netlink socket");
	return -1;
}

memset(&saddr, 0, sizeof(saddr));

saddr.nl_family = AF_NETLINK;
saddr.nl_pid = getpid();

/* Bind current process to the netlink socket */
if (bind(nl_sock, (struct sockaddr *)&saddr, sizeof(saddr)) < 0) {
	perror("Failed to bind to netlink socket");
	close(nl_sock);
	return -1;
}

Now it’s time to send request to the kernel.

/* Request struct */
struct {
	struct nlmsghdr nlh;  /* Netlink header */
	struct rtmsg rtm;     /* Payload - route message */
} nl_request;

nl_request.nlh.nlmsg_type = RTM_GETROUTE;  /* We wish to get routes */
nl_request.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
nl_request.nlh.nlmsg_len = sizeof(nl_request);
nl_request.nlh.nlmsg_seq = time(NULL);
nl_request.rtm.rtm_family = AF_INET;

ssize_t sent = send(sock, &nl_request, sizeof(nl_request), 0);

if (sent < 0) {
	perror("Failed to perfom request");
	close(nl_sock);
	return -1;
}

We need to declare request structure which describe netlink packet with a header and some payload – actual message.
In the header we specifying what we need with RTM_GETROUTE as message type which can return main routing table.
Additional flags “NLM_F_REQUEST | NLM_F_DUMP” telling kernel that this is a dump request.
As rtm_family whe can specify AF_INET if we wanna get the table for IPv4 protocol and AF_INET6 for the IPv6.

Getting kernel response is a more complex.
We need to execute vectored reading using already known recvmsg and struct iovec.
For the simplification reasons this code is splitted by the 3 functions.

On the lowest level is simple wrapper around recvmsg which more robust and can handle “busy” states.

int rtnl_receive(int fd, struct msghdr *msg, int flags)
{
    int len;

    /* Try to read the message in case of busy or interrupted call */
    do {
        len = recvmsg(fd, msg, flags);
    } while (len < 0 && (errno == EINTR || errno == EAGAIN));

    if (len < 0) {
        perror("Netlink receive failed");
        return -errno;
    }

    if (len == 0) {
        perror("EOF on netlink");
        return -ENODATA;
    }

    return len;
}

Receive is called from the rtnl_recvmsg function which read the message size first, then allocating buffer using size info and reading actual response message.

static int rtnl_recvmsg(int fd, struct msghdr *msg, char **answer)
{
    struct iovec *iov = msg->msg_iov;
    char *buf;
    int len;

    iov->iov_base = NULL;
    iov->iov_len = 0;

    /* MSG_PEEK gets only message size in the socket without reading */
    len = rtnl_receive(fd, msg, MSG_PEEK | MSG_TRUNC);

    if (len < 0) {
        return len;
    }

    /* Allocate memory for the message */
    buf = malloc(len);

    if (!buf) {
        perror("malloc failed");
        return -ENOMEM;
    }

    iov->iov_base = buf;
    iov->iov_len = len;

    /* Read message to the buffer */
    len = rtnl_receive(fd, msg, 0);

    if (len < 0) {
        free(buf);
        return len;
    }

    *answer = buf;

    return len;
}

Here struct iovec is passed from the top level function get_route_dump_response.

int get_route_dump_response(int sock)
{
    struct sockaddr_nl nladdr;
    struct iovec iov;
    struct msghdr msg = {
        .msg_name = &nladdr,
        .msg_namelen = sizeof(nladdr),
        .msg_iov = &iov,
        .msg_iovlen = 1,
    };

    char *buf;
    int dump_intr = 0;

    /* Get the message */
    int status = rtnl_recvmsg(sock, &msg, &buf);

    /* Pointer to the messages head */
    struct nlmsghdr *h = (struct nlmsghdr *)buf;
    int msglen = status;

    printf("Main routing table IPv4\n");

    /* Iterate through all messages in buffer */
    while (NLMSG_OK(h, msglen)) {
        if (h->nlmsg_flags & NLM_F_DUMP_INTR) {
            perror("Dump was interrupted\n");
            free(buf);
            return -1;
        }

        if (nladdr.nl_pid != 0) {
            continue;
        }

        if (h->nlmsg_type == NLMSG_ERROR) {
            perror("netlink reported error");
            free(buf);
        }

        /* Decode and print single message */
        print_route(h);

        h = NLMSG_NEXT(h, msglen);
    }

    free(buf);

    return status;
}

Netlink message can be splitted by the parts so this function is trying to read all that parts.
After successful receiving of the message we can call printer function print_route.

void parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
{
    memset(tb, 0, sizeof(struct rtattr *) * (max + 1));

    while (RTA_OK(rta, len)) {
        if (rta->rta_type <= max) {
            tb[rta->rta_type] = rta;
        }

        rta = RTA_NEXT(rta,len);
    }
}

static inline int rtm_get_table(struct rtmsg *r, struct rtattr **tb)
{
    __u32 table = r->rtm_table;

    if (tb[RTA_TABLE]) {
        table = *(__u32 *)RTA_DATA(tb[RTA_TABLE]);
    }

    return table;
}

Printer function is using two auxiliary function for parsing the message. One of this functions is already known from the network monitor.
Both this functions performs simple iteration on the memory, some conversion of the types and alignment.
Finally we ready to print the route.

void print_route(struct nlmsghdr* nl_header_answer)
{
    struct rtmsg* r = NLMSG_DATA(nl_header_answer);
    int len = nl_header_answer->nlmsg_len;
    struct rtattr* tb[RTA_MAX+1];
    int table;
    char buf[256];

    len -= NLMSG_LENGTH(sizeof(*r));

    if (len < 0) {
        perror("Wrong message length");
        return;
    }

    /* Parse message */
    parse_rtattr(tb, RTA_MAX, RTM_RTA(r), len);

    table = rtm_get_table(r, tb);

    if (r->rtm_family != AF_INET && table != RT_TABLE_MAIN) {
        return;
    }

    /* Read destination address from the tb at RTA_DST index */
    if (tb[RTA_DST]) {
        if ((r->rtm_dst_len != 24) && (r->rtm_dst_len != 16)) {
            return;
        }
        /* Print readable address using inet_ntop */
        printf("%s/%u ", inet_ntop(r->rtm_family, RTA_DATA(tb[RTA_DST]), buf, sizeof(buf)), r->rtm_dst_len);

    } else if (r->rtm_dst_len) {
        printf("0/%u ", r->rtm_dst_len);
    } else {
        printf("default ");
    }

    /* Do the same thing for rest of the fields */
    if (tb[RTA_GATEWAY]) {
        printf("via %s", inet_ntop(r->rtm_family, RTA_DATA(tb[RTA_GATEWAY]), buf, sizeof(buf)));
    }

    if (tb[RTA_OIF]) {
        char if_nam_buf[IF_NAMESIZE];
        int ifidx = *(__u32 *)RTA_DATA(tb[RTA_OIF]);

        printf(" dev %s", if_indextoname(ifidx, if_nam_buf));
    }

    if (tb[RTA_SRC]) {
        printf("src %s", inet_ntop(r->rtm_family, RTA_DATA(tb[RTA_SRC]), buf, sizeof(buf)));
    }

    printf("\n");
}

In the beginning this function performing parsing of the message and some checks.
Then we can easily access to different parts of the routing message using array and indices with readable defines.

To get IPv4 address in a human-redable text form is used standard inet_ntop.
Network interface is presented as numeric indexes and can be converted to the readable form (like “eth0”) using if_indextoname.
This function required pre-allocated buffer with IF_NAMESIZE size.

Now all together:

/*
 *
 */

#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/types.h>
#include <unistd.h>
#include <time.h>
#include <stdio.h>
#include <net/if.h>
#include <arpa/inet.h>
#include <sys/socket.h>
#include <linux/rtnetlink.h>

int rtnl_receive(int fd, struct msghdr *msg, int flags)
{
	int len;

	do { 
		len = recvmsg(fd, msg, flags);
	} while (len < 0 && (errno == EINTR || errno == EAGAIN));

	if (len < 0) {
		perror("Netlink receive failed");
        return -errno;
    }

	if (len == 0) { 
		perror("EOF on netlink");
		return -ENODATA;
	}

	return len;
}

static int rtnl_recvmsg(int fd, struct msghdr *msg, char **answer)
{
	struct iovec *iov = msg->msg_iov;
	char *buf;
	int len;

	iov->iov_base = NULL;
	iov->iov_len = 0;

	len = rtnl_receive(fd, msg, MSG_PEEK | MSG_TRUNC);

	if (len < 0) {
        return len;
	}

	buf = malloc(len);

	if (!buf) {
		perror("malloc failed");
		return -ENOMEM;
	}

	iov->iov_base = buf;
	iov->iov_len = len;

	len = rtnl_receive(fd, msg, 0);

	if (len < 0) {
		free(buf);
		return len;
	}

	*answer = buf;

	return len;
}

void parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
{
	memset(tb, 0, sizeof(struct rtattr *) * (max + 1));

	while (RTA_OK(rta, len)) {
		if (rta->rta_type <= max) {
			tb[rta->rta_type] = rta;
		}

		rta = RTA_NEXT(rta,len);
	}
}

static inline int rtm_get_table(struct rtmsg *r, struct rtattr **tb)
{
	__u32 table = r->rtm_table;

	if (tb[RTA_TABLE]) {
		table = *(__u32 *)RTA_DATA(tb[RTA_TABLE]);
	}

	return table;
}

void print_route(struct nlmsghdr* nl_header_answer)
{
	struct rtmsg* r = NLMSG_DATA(nl_header_answer);
	int len = nl_header_answer->nlmsg_len;
	struct rtattr* tb[RTA_MAX+1];
	int table;
	char buf[256];

	len -= NLMSG_LENGTH(sizeof(*r));

	if (len < 0) {
		perror("Wrong message length");
		return;
	}
	
	parse_rtattr(tb, RTA_MAX, RTM_RTA(r), len);

	table = rtm_get_table(r, tb);

	if (r->rtm_family != AF_INET && table != RT_TABLE_MAIN) {
		return;
	}

	if (tb[RTA_DST]) {
		if ((r->rtm_dst_len != 24) && (r->rtm_dst_len != 16)) {
			return;
		}

		printf("%s/%u ", inet_ntop(r->rtm_family, RTA_DATA(tb[RTA_DST]), buf, sizeof(buf)), r->rtm_dst_len);

	} else if (r->rtm_dst_len) {
		printf("0/%u ", r->rtm_dst_len);
	} else {
		printf("default ");
	}

	if (tb[RTA_GATEWAY]) {
		printf("via %s", inet_ntop(r->rtm_family, RTA_DATA(tb[RTA_GATEWAY]), buf, sizeof(buf)));
	}

	if (tb[RTA_OIF]) {
		char if_nam_buf[IF_NAMESIZE];
	    int ifidx = *(__u32 *)RTA_DATA(tb[RTA_OIF]);

		printf(" dev %s", if_indextoname(ifidx, if_nam_buf));
	}

	if (tb[RTA_SRC]) {
		printf("src %s", inet_ntop(r->rtm_family, RTA_DATA(tb[RTA_SRC]), buf, sizeof(buf)));
	}

	printf("\n");
}

int open_netlink()
{
	struct sockaddr_nl saddr;

	int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);

	if (sock < 0) {
		perror("Failed to open netlink socket");
		return -1;
	}

	memset(&saddr, 0, sizeof(saddr));

	saddr.nl_family = AF_NETLINK;
	saddr.nl_pid = getpid();

	if (bind(sock, (struct sockaddr *)&saddr, sizeof(saddr)) < 0) {
		perror("Failed to bind to netlink socket");
		close(sock);
		return -1;
	}

	return sock;
}

int do_route_dump_requst(int sock)
{
    struct {
        struct nlmsghdr nlh;
        struct rtmsg rtm;
    } nl_request;

	nl_request.nlh.nlmsg_type = RTM_GETROUTE;
	nl_request.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
	nl_request.nlh.nlmsg_len = sizeof(nl_request);
	nl_request.nlh.nlmsg_seq = time(NULL);
	nl_request.rtm.rtm_family = AF_INET;

	return send(sock, &nl_request, sizeof(nl_request), 0);
}

int get_route_dump_response(int sock)
{
	struct sockaddr_nl nladdr;
	struct iovec iov;
	struct msghdr msg = {
		.msg_name = &nladdr,
		.msg_namelen = sizeof(nladdr),
		.msg_iov = &iov,
		.msg_iovlen = 1,
	};

	char *buf;
	int dump_intr = 0;

	int status = rtnl_recvmsg(sock, &msg, &buf);

	struct nlmsghdr *h = (struct nlmsghdr *)buf;
	int msglen = status;

	printf("Main routing table IPv4\n");

	while (NLMSG_OK(h, msglen)) {
		if (h->nlmsg_flags & NLM_F_DUMP_INTR) {
			fprintf(stderr, "Dump was interrupted\n");
			free(buf);
			return -1;
		}

		if (nladdr.nl_pid != 0) {
			continue;
		}

		if (h->nlmsg_type == NLMSG_ERROR) {
			perror("netlink reported error");
			free(buf);
		}

		print_route(h);

		h = NLMSG_NEXT(h, msglen);
	}

	free(buf);

	return status;
}

int main()
{
	int nl_sock = open_netlink();

	if (do_route_dump_requst(nl_sock) < 0) {
		perror("Failed to perfom request");
		close(nl_sock);
		return -1;
	}

	get_route_dump_response(nl_sock);

	close (nl_sock);

	return 0;
}

Compilation and execution:

$ gcc routing.c -o routing
$ ./routing
Main routing table IPv4
default via 192.168.8.1 dev eth0
169.254.0.0/16 dev eth0
192.168.8.0/24 dev eth0

That’s it.
In the next article I will show how to delete and add new routes.

Thanks for reading!

Tagged , , , ,

2 thoughts on “Getting Linux routing table using netlink.

Leave a Reply

This site uses Akismet to reduce spam. Learn how your comment data is processed.