Kernel Module Snippets – Part 3 – UDP in the kernel

Kernel Module Snippets – Part 3 – UDP in the kernel

Intro

A part of the project I’ve been working on requires the use of a virtual network device. Traditionally, this would be a tun/tap device. The user-space program would register a new tunnel device connected to /dev/net/tun, which presents the user with a tun0 device to which you can assign an IP address etc. IP traffic can then be routed through this device, via what ever “magic” the user-space application (like OpenVPN) conjures up, and out via a matching equivalent on (usually) another system. Thus creating the tunnel out of the 2 tun0 devices where the user-space application acts in the place of the physical media (think virtual network cable.)

As I mentioned in the start of this series, we’re digging into kernel space code because we need something a little faster on some very low-power low-cost devices. There is a way to implement a virtual network device in-kernel too. You register your device, and can present the user with a net device (tun0) to use. But that (I think) is where the equivalence to tuntap ends.  I’ll show an example of settings up a virtual network device from a kernel module below.

The Code

This time around I’m not going to show a full example, just some code snippets to get you going.

To start off, we need to register to the current network namespace, this gives us a place to store important things like pointers to sockets which we’ll need later.

   
  


pernet_operations



/\* This goes into your .h file \*/

struct my_net {

    struct list_head my_list;

    struct mutex my_lock;

};

/\* This at the top of your module.c \*/

static unsigned int my_net_id;

static __net_init int my_init_net(struct net *net)

{

    struct my_net *mn = net_generic(net, my_net_id);

 

    INIT_LIST_HEAD(&mn->my_list);

    mutex_init(&mn->my_lock);

    return 0;

}

 

static __net_exit void my_exit_net(struct net *net)

{

    /\* Close any sockets and free any netns specific stuff \*/

}

 

/\* Struct containing pointers to the above functions \*/

static struct pernet_operations my_net_ops = {

    .init = my_init_net,

    .exit = my_exit_net,

    .id   = &my_net_id,

    .size = sizeof(struct my_net),

};

 

static int __init my_init_module(void)

{

    int rc;

    ...

    rc = register_pernet_subsys(&my_net_ops);

    if (rc)

        goto err;

    ...

err:

    return rc;

}

static void __exit my_cleanup_module(void)

{

    ...

    unregister_pernet_subsys(&my_net_ops);

    ...

}

  

With that done, we can now jump into registering the network interface.

We’ll do that in much the same way by creating a struct that points to a few key functions, using that struct to register and then add in some config data. We also need to add calls to  the init/exit module functions to get things going.

Now this is quite a chunk of code so take it one piece at a time and work from the bottom up.

   
  

static int my_init(struct net_device *dev)

{

    struct my_dev *mydev = netdev_priv(dev);

    int err;

 

    dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);

    if (!dev->tstats)

        return -ENOMEM;

 

    err = gro_cells_init(&mydev->gro_cells, dev);

    if (err) {

        free_percpu(dev->tstats);

        return err;

    }

    return 0;

}

 

static void my_uninit(struct net_device *dev)

{

    struct my_dev *mydev = netdev_priv(dev);

 

    gro_cells_destroy(&mydev->gro_cells);

    free_percpu(dev->tstats);

}

 

static int my_open(struct net_device *dev)

{

    netif_start_queue(dev);

    return 0;

}

 

static int my_stop(struct net_device *dev)

{

    netif_stop_queue(dev);

    return 0;

}

 

static netdev_tx_t my_xmit(struct sk_buff \*skb, struct net_device \*dev)

{

    /\* This is where the magic happens \*/

    ...

    return NETDEV_TX_OK;

}

 

static int my_fill_metadata_dst(struct net_device \*dev, struct sk_buff \*skb)

{

    return 0;

}

 

static const struct net_device_ops my_netdev_ops = {

    .ndo_init               = my_init,

    .ndo_uninit             = my_uninit,

    .ndo_open               = my_open,

    .ndo_stop               = my_stop,

    .ndo_start_xmit         = my_xmit,

    .ndo_get_stats64        = ip_tunnel_get_stats64,

    .ndo_fill_metadata_dst  = my_fill_metadata_dst,

};

 

static const struct nla_policy my_policy\[IFLA_MY_MAX + 1] = {

 

};

 

/\* Info for udev, that this is a virtual tunnel endpoint \*/

static struct device_type my_type = {

    .name = "my",

};

 

/\* Initialize the device structure. \*/

static void my_setup(struct net_device *dev)

{

    dev->netdev_ops = &my_netdev_ops;

    dev->needs_free_netdev = true;

    SET_NETDEV_DEVTYPE(dev, &my_type);

    dev->features    |= NETIF_F_SG | NETIF_F_HW_CSUM;

    dev->features    |= NETIF_F_RXCSUM;

    dev->features    |= NETIF_F_GSO_SOFTWARE;

    dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;

    dev->hw_features |= NETIF_F_GSO_SOFTWARE;

    dev->hard_header_len = 0;

    dev->addr_len = 0;

    dev->mtu = ETH_DATA_LEN;

    dev->min_mtu = IPV4_MIN_MTU;

    dev->max_mtu = IP_MAX_MTU;

    dev->type = ARPHRD_NONE;

    netif_keep_dst(dev);

    dev->priv_flags |= IFF_NO_QUEUE;

    dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;

}

 

static int my_validate(struct nlattr \*tb\[], struct nlattr \*data\[],

                struct netlink_ext_ack *extack)

{

    return 0;

}

 

static int my2info(struct nlattr \*data\[], struct my_dev_cfg \*conf,

            struct netlink_ext_ack *extack)

{

    memset(conf, 0, sizeof(*conf));

    conf->id = 0;

 

    return 0;

}

 

static struct my_dev \*my_find_dev(struct my_net \*mn,

                        const struct my_dev_cfg *conf)

{

    struct my_dev \*mydev, \*t = NULL;

 

    if(! list_empty(&mn->my_dev_list))

        list_for_each_entry(mydev, &mn->my_dev_list, next) {

            if (conf->id == mydev->id)

                t = mydev;

        }

    return t;

}

 

static int my_configure(struct net \*net, struct net_device \*dev,

                 struct my_dev_cfg *conf)

{

    struct my_net *mn = net_generic(net, my_net_id);

    struct my_dev \*t, \*mydev = netdev_priv(dev);

    int err;

 

    if(!mydev)

        return -EBUSY;

 

    mydev->net = net;

    mydev->dev = dev;

    t = my_find_dev(mn, conf);

    if (t)

        return -EBUSY;

 

    mydev->id = conf->id;

    err = register_netdevice(dev);

    if (err)

        return err;

 

    list_add(&mydev->next, &mn->my_dev_list);

    return 0;

}

 

static int my_link_config(struct net_device \*dev, struct nlattr \*tb\[])

{

    int err;

 

    if (tb\[IFLA_MTU]) {

        err = dev_set_mtu(dev, nla_get_u32(tb\[IFLA_MTU]));

        if (err)

            return err;

    }

    return 0;

}

 

static int my_newlink(struct net \*net, struct net_device \*dev,

               struct nlattr \*tb\[], struct nlattr \*data\[],

               struct netlink_ext_ack *extack)

{

    struct my_dev_cfg conf;

    int err;

 

    err = my2info(data, &conf, extack);

    if (err)

        return err;

 

    err = my_configure(net, dev, &conf);

    if (err)

        return err;

 

    err = my_link_config(dev, tb);

    if (err)

        return err;

 

    return 0;

}

 

static void my_dellink(struct net_device \*dev, struct list_head \*head)

{

    struct my_dev *mydev = netdev_priv(dev);

 

    list_del(&mydev->next);

    unregister_netdevice_queue(dev, head);

}

 

static size_t my_get_size(const struct net_device *dev)

{

    return 0;

}

 

static int my_link_fill_info(struct sk_buff \*skb, const struct net_device \*dev)

{

    return 0;

}

 

static struct rtnl_link_ops my_link_ops __read_mostly = {

    .kind           = "my",

    .maxtype        = IFLA_MY_MAX,

    .policy         = my_policy,

    .priv_size      = sizeof(struct my_dev),

    .setup          = my_setup,

    .validate       = my_validate,

    .newlink        = my_newlink,

    .dellink        = my_dellink,

    .get_size       = my_get_size,

    .fill_info      = my_link_fill_info,

};

 

/\* module init \*/

rc = rtnl_link_register(&my_link_ops);

 

/\* module cleanup \*/

rtnl_link_unregister(&my_link_ops);

  

So if you look through the code, you’ll find the magic “_xmit” function, but also note that there is no matching “_recv” function. This caught me off guard originally, but then it occurred to me to think in full-duplex and it all made sense. Each component, each side of the equation, has a _xmit – and that’s what we need to implement. Once the packet gets into the queue, it will get delivered to where it needs to go. We just need to make sure our _xmit delivers to the right queue in the right way. For that, we use netif_receive_skb(skb).

Now that we’re transmitting packets, we’ll follow up with how to receive in the next article.

Categories: Development, Training

By Rob Hartzenberg

September 24, 2022

Rob Hartzenberg
Author: Rob Hartzenberg

Linux Engineer

PREVIOUS

Kernel Module Snippets - Part 5 - Encryption

NEXT

Kernel Module Snippets – Part 1 – Hello World