跳转至

第四篇:SoftIRQ 与 Tasklet — 下半部中断处理机制

源码:kernel/softirq.c | 头文件:include/linux/interrupt.h include/linux/preempt.h

系列目录:IRQ 内核源码深度解析


1. 为什么需要下半部

硬中断处理禁止睡眠且关中断,必须快速返回。耗时工作通过下半部延迟执行:

机制 上下文 并发性 动态注册 开销
SoftIRQ 软中断 同类型多 CPU 并发 极低
Tasklet 软中断 同一 tasklet 串行
Workqueue 进程 可睡眠 中等

2. SoftIRQ 类型枚举 (interrupt.h:550-563)

10 种,按优先级从高到低:

// include/linux/interrupt.h:550
enum {
    HI_SOFTIRQ=0,    TIMER_SOFTIRQ,   NET_TX_SOFTIRQ,  NET_RX_SOFTIRQ,
    BLOCK_SOFTIRQ,   IRQ_POLL_SOFTIRQ,TASKLET_SOFTIRQ, SCHED_SOFTIRQ,
    HRTIMER_SOFTIRQ, RCU_SOFTIRQ,     /* RCU should always be the last */
    NR_SOFTIRQS     // = 10
};
// kernel/softirq.c:60
static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;

// kernel/softirq.c:64
const char * const softirq_to_name[NR_SOFTIRQS] = {
    "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL",
    "TASKLET", "SCHED", "HRTIMER", "RCU"
};

3. 核心数据结构

3.1 struct softirq_action (interrupt.h:590)

// include/linux/interrupt.h:590
struct softirq_action {
    void (*action)(void);
};

3.2 struct tasklet_struct (interrupt.h:691)

// include/linux/interrupt.h:691
struct tasklet_struct {
    struct tasklet_struct *next;   // 链表指针
    unsigned long state;           // TASKLET_STATE_SCHED / TASKLET_STATE_RUN
    atomic_t count;                // 0=启用, 非0=禁用
    bool use_callback;
    union {
        void (*func)(unsigned long data);         // 旧接口
        void (*callback)(struct tasklet_struct *t); // 新接口
    };
    unsigned long data;
};

// include/linux/interrupt.h:733
enum { TASKLET_STATE_SCHED, TASKLET_STATE_RUN };

// include/linux/interrupt.h:704, 721 — DECLARE_TASKLET / DECLARE_TASKLET_OLD

3.3 tasklet_head (softirq.c:814)

// kernel/softirq.c:814
struct tasklet_head {
    struct tasklet_struct *head;
    struct tasklet_struct **tail;    // O(1) 追加: *tail = t; tail = &t->next;
};
// kernel/softirq.c:819
static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);

4. SoftIRQ 的注册与触发

4.1 open_softirq (softirq.c:806)

// kernel/softirq.c:806
void open_softirq(int nr, void (*action)(void)) {
    softirq_vec[nr].action = action;
}

典型调用:open_softirq(TASKLET_SOFTIRQ, tasklet_action) 等,由 softirq_init 和各子系统注册。

4.2 pending 位图 (interrupt.h:528-530)

// include/linux/interrupt.h:528-530
#define local_softirq_pending()   (__this_cpu_read(local_softirq_pending_ref))
#define set_softirq_pending(x)    (__this_cpu_write(local_softirq_pending_ref, (x)))
#define or_softirq_pending(x)     (__this_cpu_or(local_softirq_pending_ref, (x)))

local_softirq_pending_ref 映射到 per-CPU irq_stat.__softirq_pending

__softirq_pending bit 布局 (32 bits):
┌───┬───┬───┬───┬───┬───┬───┬───┬───┬───┬────────────────────┐
│RCU│HRT│SCH│TSK│IRQ│BLK│NRX│NTX│TMR│ HI│         0           │
│ 9 │ 8 │ 7 │ 6 │ 5 │ 4 │ 3 │ 2 │ 1 │ 0 │                     │
└───┴───┴───┴───┴───┴───┴───┴───┴───┴───┴────────────────────┘

4.3 raise_softirq / __raise_softirq_irqoff (softirq.c:790, 799)

// kernel/softirq.c:790
void raise_softirq(unsigned int nr) {
    unsigned long flags;
    local_irq_save(flags);
    raise_softirq_irqoff(nr);
    local_irq_restore(flags);
}

// kernel/softirq.c:799
void __raise_softirq_irqoff(unsigned int nr) {
    lockdep_assert_irqs_disabled();
    trace_softirq_raise(nr);
    or_softirq_pending(1UL << nr);   // 设置 pending bit
}

4.4 wakeup_softirqd (softirq.c:75)

// kernel/softirq.c:75
static void wakeup_softirqd(void) {
    struct task_struct *tsk = __this_cpu_read(ksoftirqd);
    if (tsk) wake_up_process(tsk);
}

5. do_softirq (softirq.c:510)

// kernel/softirq.c:510
asmlinkage __visible void do_softirq(void) {
    __u32 pending;
    unsigned long flags;

    if (in_interrupt())          // 防止重入
        return;

    local_irq_save(flags);
    pending = local_softirq_pending();
    if (pending)
        do_softirq_own_stack();  // 切栈调用 __do_softirq
    local_irq_restore(flags);
}

6. handle_softirqs — 核心执行 (softirq.c:579)

// kernel/softirq.c:543-544
#define MAX_SOFTIRQ_TIME  msecs_to_jiffies(2)
#define MAX_SOFTIRQ_RESTART 10

// kernel/softirq.c:579
static void handle_softirqs(bool ksirqd) {
    unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
    unsigned long old_flags = current->flags;
    int max_restart = MAX_SOFTIRQ_RESTART;
    struct softirq_action *h;
    bool in_hardirq;
    __u32 pending;
    int softirq_bit;

    current->flags &= ~PF_MEMALLOC;
    pending = local_softirq_pending();

    softirq_handle_begin();
    in_hardirq = lockdep_softirq_start();
    account_softirq_enter(current);

restart:
    set_softirq_pending(0);       // ★ 先清零 pending,再开中断
    local_irq_enable();           // ★ 此时可被硬件中断抢占

    h = softirq_vec;
    while ((softirq_bit = ffs(pending))) {  // ffs 从低到高找第一个 1
        unsigned int vec_nr;
        int prev_count;

        h += softirq_bit - 1;
        vec_nr = h - softirq_vec;
        prev_count = preempt_count();

        kstat_incr_softirqs_this_cpu(vec_nr);
        trace_softirq_entry(vec_nr);
        h->action();              // ★ 执行 handler
        trace_softirq_exit(vec_nr);

        if (unlikely(prev_count != preempt_count())) {
            pr_err("huh, entered softirq %u %s %p with "
                   "preempt_count %08x, exited with %08x?\n",
                   vec_nr, softirq_to_name[vec_nr], h->action,
                   prev_count, preempt_count());
            preempt_count_set(prev_count);
        }
        h++;
        pending >>= softirq_bit;
    }

    if (!IS_ENABLED(CONFIG_PREEMPT_RT) && ksirqd)
        rcu_softirq_qs();

    local_irq_disable();
    pending = local_softirq_pending();

    if (pending) {
        if (time_before(jiffies, end) && !need_resched() && --max_restart)
            goto restart;          // 还有 pending 且未超限 → 重试
        wakeup_softirqd();         // 超限 → 交给 ksoftirqd
    }

    account_softirq_exit(current);
    lockdep_softirq_end(in_hardirq);
    softirq_handle_end();
    current_restore_flags(old_flags, PF_MEMALLOC);
}

6.1 关键设计:先清零再开中断

正确:  set_pending(0) → local_irq_enable → action
       开中断后新 IRQ 进来 → raise_softirq → 设 pending bit
       → 下一轮 restart 会处理新 pending ✅

错误:  local_irq_enable → action → set_pending(0)
       新 IRQ 设置 pending bit → 当前轮结束清零 → 新 softirq 丢失 ❌

6.2 防止饥饿

两个停止条件确保软中断不能无限运行:时间 > 2ms 或重试 > 10 次 → 唤醒 ksoftirqd。


7. ksoftirqd — 守护线程

// kernel/softirq.c:62
DEFINE_PER_CPU(struct task_struct *, ksoftirqd);

// kernel/softirq.c:1063
static int ksoftirqd_should_run(unsigned int cpu) {
    return local_softirq_pending();   // 有 pending 就唤醒
}

// kernel/softirq.c:1068
static void run_ksoftirqd(unsigned int cpu) {
    ksoftirqd_run_begin();
    if (local_softirq_pending()) {
        handle_softirqs(true);    // ksirqd=true
        ksoftirqd_run_end();
        cond_resched();
        return;
    }
    ksoftirqd_run_end();
}

每个 CPU 一个 ksoftirqd/N 线程,SCHED_OTHER 策略,不抢占实时任务。


8. irq_exit → 下半部入口

do_IRQ() → handle_irq_event → irq_exit():
    if (!in_interrupt() && local_softirq_pending()) {
#ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
        __do_softirq();          // 中断栈几乎为空 → 直接执行
#else
        do_softirq_own_stack();  // 任务栈可能很深 → 切栈
#endif
    } else {
        wakeup_softirqd();       // 已在中斷上下文 → 委托
    }

9. Tasklet:建立在 TASKLET_SOFTIRQ 之上

9.1 调度 (interrupt.h:758)

// include/linux/interrupt.h:758
static inline void tasklet_schedule(struct tasklet_struct *t) {
    if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
        __tasklet_schedule(t);   // 防重入: 同一 tasklet 只调度一次
}

9.2 执行 (softirq.c:916)

// kernel/softirq.c:916
static void tasklet_action_common(struct tasklet_head *tl_head,
                                  unsigned int softirq_nr) {
    struct tasklet_struct *list;

    local_irq_disable();
    list = tl_head->head;
    tl_head->head = NULL;              // ★ 原子摘取整条链表
    tl_head->tail = &tl_head->head;
    local_irq_enable();

    tasklet_lock_callback();
    while (list) {
        struct tasklet_struct *t = list;
        list = list->next;

        if (tasklet_trylock(t)) {           // ★ test_and_set RUN → 串行
            if (!atomic_read(&t->count)) {   // count==0 → 已启用
                if (tasklet_clear_sched(t)) {
                    if (t->use_callback)
                        t->callback(t);      // 新接口
                    else
                        t->func(t->data);    // 旧接口
                }
                tasklet_unlock(t);
                tasklet_callback_sync_wait_running();
                continue;
            }
            tasklet_unlock(t);
        }
        // trylock 失败 (正被其他 CPU 执行) → 重新挂回链表
        local_irq_disable();
        t->next = NULL;
        *tl_head->tail = t;
        tl_head->tail = &t->next;
        __raise_softirq_irqoff(softirq_nr);    // 重新触发
        local_irq_enable();
    }
    tasklet_unlock_callback();
}

9.3 状态机

              tasklet_schedule()
  [IDLE] ──────────────────────────► [SCHED]
    ▲                                    │
    │                softirq 触发         │
    │                tasklet_trylock()    │
    │                         ┌──────────┘
    │                         ▼
    │                    [RUNNING] (TASKLET_STATE_RUN set)
    │                         │
    │           ┌─────────────┤
    │           │ 执行完成     │ 被其他 CPU 重新调度
    │           ▼             ▼
    │     tasklet_unlock → [IDLE] 或 → [SCHED] (requeue + raise_softirq)
    └── trylock 失败 → requeue 回链表 → 重新 raise TASKLET_SOFTIRQ

10. 上下文判断宏 (preempt.h:127-141)

// include/linux/preempt.h:127
#define in_hardirq()        (hardirq_count())

// include/linux/preempt.h:130
#ifdef CONFIG_PREEMPT_RT
# define in_task()          (!((preempt_count() & (NMI_MASK | HARDIRQ_MASK)) |
                               in_serving_softirq()))
#else
# define in_task()          (!(preempt_count() & (NMI_MASK | HARDIRQ_MASK |
                                                  SOFTIRQ_OFFSET)))
#endif

// include/linux/preempt.h:140 (deprecated)
#define in_softirq()        (softirq_count())
// include/linux/preempt.h:141 (deprecated)
#define in_interrupt()      (irq_count())

速查表:

┌────────────────────┬──────────┬───────────┬──────────┬──────────┐
│ 执行环境            │ in_task  │ in_hardirq│ in_softirq│in_interrupt│
├────────────────────┼──────────┼───────────┼──────────┼──────────┤
│ 进程上下文          │  true    │   false   │  false   │  false   │
│ 硬中断 ISR          │  false   │   true    │  false   │  true    │
│ 软中断 handler      │  false   │   false   │  true    │  true    │
│ BH 禁用区           │  false   │   false   │  true    │  true    │
└────────────────────┴──────────┴───────────┴──────────┴──────────┘

11. local_bh_enable/disable

local_bh_disable()
   __local_bh_disable_ip()  // 递增 preempt_count SOFTIRQ 部分, 防同 CPU 软中断重入

local_bh_enable()
   __local_bh_enable_ip()
     递减计数; if (降到0 && local_softirq_pending())
         __do_softirq()  // 或 wakeup_softirqd

注意:local_bh_disable 只保护同一 CPU,不防其他 CPU 的 softirq。跨 CPU 保护需要 spin_lock。


12. 完整流程图

        硬件设备
    ┌─────────────────┐
    │ arch_entry       │ x86: common_interrupt
    │ → handle_irq_desc│ arm64: el1_irq
    └────────┬────────┘
    ┌─────────────────┐
    │ handle_irq_event │ 上半部 ISR: raise_softirq_irqoff() 设 pending
    └────────┬────────┘
    ┌─────────────────┐
    │ irq_exit()       │
    │ → invoke_softirq │
    └───┬────┬─────────┘
        │    │
  in_interrupt│ !in_interrupt && pending
        │    │
        ▼    ▼
    [返回] ┌──────────────────────┐
           │ handle_softirqs()    │ softirq.c:579
           │                      │
           │ set_pending(0)       │ 先清零
           │ local_irq_enable     │ 开中断
           │                      │
           │ while ffs(pending):  │ 按优先级遍历
           │  HI → TIMER →        │
           │  NET_TX → NET_RX →   │
           │  BLOCK → IRQ_POLL →  │
           │  TASKLET → SCHED →   │
           │  HRTIMER → RCU       │
           │                      │
           │ still pending?       │
           │  ├─ yes (<max): goto │
           │  │         restart   │
           │  └─ no/超限:         │
           │     wakeup_softirqd  │
           └──────────────────────┘
           ┌──────────────────┐
           │ ksoftirqd/N       │ 进程上下文继续处理
           └──────────────────┘

TASKLET_SOFTIRQ 触发时:
    ┌───────────────────────┐
    │ tasklet_action_common │ softirq.c:916
    │   摘取 per-CPU 链表    │
    │   遍历: trylock       │
    │     count==0 → execute│
    │     trylock失败→requeue│
    └───────────────────────┘

13. 调试与选择

cat /proc/softirqs                    # SoftIRQ per-CPU 统计
ps aux | grep ksoftirqd               # 守护线程
echo 1 > /sys/kernel/debug/tracing/events/irq/softirq_entry/enable
cat /sys/kernel/debug/tracing/trace_pipe  # tracepoint
watch -n1 cat /proc/softirqs          # 检测饥饿

选择决策:要睡眠→Workqueue; 不要睡眠+需多CPU并发+极致延迟→SoftIRQ; 其余→Tasklet.


下一篇文章

第五篇:CPU Hotplug、MSI 与电源管理 — IRQ 的跨 CPU 迁移与睡眠唤醒


💬 评论