Hello, Habr!
Let's talk about Linux network device drivers, the NAPI mechanism and its changes in the 5.12 kernel.
Linux () BSD, . unix- , TCP/IP (sys_socket) , , IP- . (sk_buff) - , . (tx_queue, rx_queue).
– sk_buff:
struct sk_buff {
union {
struct {
/* */
struct sk_buff *next;
struct sk_buff *prev;
struct net_device *dev;
};
struct list_head list;
};
struct sock *sk;
unsigned int len,
data_len;
__u16 mac_len,
hdr_len;
/* NAPI- */
#if defined(CONFIG_NET_RX_BUSY_POLL) || defined(CONFIG_XPS)
union {
unsigned int napi_id;
unsigned int sender_cpu;
};
#endif
__u8 inner_ipproto;
__u16 inner_transport_header;
__u16 inner_network_header;
__u16 inner_mac_header;
__be16 protocol;
__u16 transport_header;
__u16 network_header;
__u16 mac_header;
sk_buff_data_t tail;
sk_buff_data_t end;
unsigned char *head,
*data;
unsigned int truesize;
};
( MAC-) . xmit rx, , stats . net_device, alloc_netdev register_netdev.
net_device:
struct net_device {
char name[IFNAMSIZ]; // printf
unsigned long mem_end;
unsigned long mem_start;
unsigned long base_addr;
unsigned long state;
struct list_head dev_list;
struct list_head napi_list;
unsigned int flags;
unsigned int priv_flags;
const struct net_device_ops *netdev_ops;
unsigned short hard_header_len;
unsigned int mtu;
struct net_device_stats stats;
atomic_long_t rx_dropped;
atomic_long_t tx_dropped;
atomic_long_t rx_nohandler;
const struct ethtool_ops *ethtool_ops;
const struct header_ops *header_ops;
unsigned char if_port;
unsigned char dma;
/* Interface address info. */
unsigned char perm_addr[MAX_ADDR_LEN];
unsigned short dev_id;
unsigned short dev_port;
spinlock_t addr_list_lock;
int irq;
unsigned char *dev_addr;
struct netdev_rx_queue *_rx;
unsigned int num_rx_queues;
struct netdev_queue *_tx ____cacheline_aligned_in_smp;
unsigned int num_tx_queues;
struct timer_list watchdog_timer;
int watchdog_timeo;
};
: , , . Linux, “” , .
: ( intel Ethernet e1000):
static bool e1000_clean_rx_irq(struct e1000_adapter *adapter, //
struct e1000_rx_ring *rx_ring, //
int *work_done, int work_to_do)
{
while (rx_desc->status & E1000_RXD_STAT_DD) {
struct sk_buff *skb;
u8 *data;
u8 status;
if (netdev->features & NETIF_F_RXALL) {
total_rx_bytes += (length - 4);
total_rx_packets++;
e1000_receive_skb(adapter, status, rx_desc->special, skb);
}
}
if (cleaned_count) //
adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
//
adapter->total_rx_packets += total_rx_packets;
adapter->total_rx_bytes += total_rx_bytes;
netdev->stats.rx_bytes += total_rx_bytes;
netdev->stats.rx_packets += total_rx_packets;
return cleaned;
}
2.3 (top half) (bottom half) (task queue). 2.3 BH (softirq), (tasklet) (work queue). softirq , . .
NAPI
, . . 2001 New API 2.4. ( – SMP-, pktgen).
NAPI - , . NAPI . , , . .
NAPI- , . rx_schedule, , . ( – budget), dev->poll. poll , , . , . . poll .
poll e1000:
static void e1000_netpoll(struct net_device *netdev)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
if (disable_hardirq(adapter->pdev->irq))
e1000_intr(adapter->pdev->irq, netdev);
enable_irq(adapter->pdev->irq);
}
NAPI- :
DMA
poll
poll softirq ksoftirqd, .
NAPI:
, dev->poll
IRQ-, .
NAPI 5.12?
Wei Wang , – . , softirq. CPU, , , userspace- . . Kthread CPU, , .
net/core/dev.c. __napi_poll, napi_poll. sysfs net_device / napi up/down.
napi_struct threaded , kthread napi_set_threaded ( NAPI_STATE_THREADED).
napi_struct:
struct napi_struct {
struct list_head dev_list;
struct hlist_node napi_hash_node;
unsigned int napi_id;
struct task_struct *thread;
};
:
static int napi_kthread_create(struct napi_struct *n)
{ int err = 0;
/* Create and wake up the kthread once to put it in
* TASK_INTERRUPTIBLE mode to avoid the blocked task
* warning and work with loadavg.
*/
n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d",
n->dev->name, n->napi_id);
if (IS_ERR(n->thread)) {
err = PTR_ERR(n->thread);
pr_err("kthread_run failed with err %d\n", err);
n->thread = NULL;
}
return err;
}
napi_thread_wait.
Wei Wang softirq, kthread :
- LDD3 :
NAPI polling in kernel threads
Threadable NAPI polling, softirqs, and proper fixes
Reworking NAPI
Driver porting: Network drivers
Thanks in advance for clarifications and error indications!