preempt_count
本质上是一个per-CPU的32位变量,preempt_count
代表的是该进程是否可以被抢占,peermpt_count
等于0的时候当前进程就可以被抢占,当小于0存在bug,大于0说明当前进程不可以被抢占。具体每一位含义如下图所示。
hardirq相关
preempt_count
中的第16到19个bit表示hardirq count,它记录了进入hardirq/top half的嵌套次数。irq_enter()用于标记hardirq的进入,此时hardirq count的值会加1。irq_exit()用于标记hardirq的退出,hardirq count的值会相应的减1。如果hardirq count的值为正数,说明现在正处于hardirq上下文中,代码中可借助**in_irq()**宏实现快速判断。注意这里的命名是"in_irq"而不是"in_hardirq"。
hardirq count占据4个bits,理论上可以表示16层嵌套,但现在Linux系统并不支持hardirq的嵌套执行,所以实际使用的只有1个bit。
#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
#define in_irq() (hardirq_count())
#define __irq_enter() \
do { \
account_irq_enter_time(current); \
preempt_count_add(HARDIRQ_OFFSET); \
trace_hardirq_enter(); \
} while (0)
#define __irq_exit() \
do { \
trace_hardirq_exit(); \
account_irq_exit_time(current); \
preempt_count_sub(HARDIRQ_OFFSET); \
} while (0)
int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
bool lookup, struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
unsigned int irq = hwirq;
int ret = 0;
irq_enter(); // 进入中断hardirq++
#ifdef CONFIG_HISI_BB
irq_trace_hook(0, 0, hwirq);
irq_register_hook(old_regs);
#endif
#ifdef CONFIG_IRQ_DOMAIN
if (lookup)
irq = irq_find_mapping(domain, hwirq);
#endif
/*
* Some hardware gives randomly wrong interrupts. Rather
* than crashing, do something sensible.
*/
if (unlikely(!irq || irq >= nr_irqs)) {
ack_bad_irq(irq);
ret = -EINVAL;
} else {
generic_handle_irq(irq);
}
#ifdef CONFIG_HISI_BB
irq_trace_hook(1, 0, hwirq);
#endif
irq_exit(); // 退出中断hardirq--
set_irq_regs(old_regs);
return ret;
}
softirq相关
preempt_count
中的第8到15个bit表示softirq count,它记录了进入softirq的嵌套次数,如果softirq count的值为正数,说明现在正处于softirq上下文中。由于softirq在单个CPU上是不会嵌套执行的,因此和hardirq count一样,实际只需要一个bit(bit 8)就可以了。但这里多出的7个bits并不是因为历史原因多出来的,而是另有他用。
void irq_exit(void)
{
#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
local_irq_disable();
#else
WARN_ON_ONCE(!irqs_disabled());
#endif
account_irq_exit_time(current);
preempt_count_sub(HARDIRQ_OFFSET);
if (!in_interrupt() && local_softirq_pending())
invoke_softirq(); // 退出中断前,判断当前上下文不是软中断和硬中断,才会触发软中断(同一个cpu软中断不可嵌套)
tick_irq_exit();
rcu_irq_exit();
trace_hardirq_exit(); /* must be last! */
}
asmlinkage __visible void __softirq_entry __do_softirq(void)
{
unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
unsigned long old_flags = current->flags;
int max_restart = MAX_SOFTIRQ_RESTART;
struct softirq_action *h;
bool in_hardirq;
__u32 pending;
int softirq_bit;
/*
* Mask out PF_MEMALLOC s current task context is borrowed for the
* softirq. A softirq handled such as network RX might set PF_MEMALLOC
* again if the socket is related to swap
*/
current->flags &= ~PF_MEMALLOC;
pending = local_softirq_pending();
account_irq_enter_time(current);
__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); // 进入软中断前softirq++
in_hardirq = lockdep_softirq_start();
restart:
/* Reset the pending bitmask before enabling irqs */
set_softirq_pending(0);
local_irq_enable();
h = softirq_vec;
while ((softirq_bit = ffs(pending))) {
unsigned int vec_nr;
int prev_count;
h += softirq_bit - 1;
vec_nr = h - softirq_vec;
prev_count = preempt_count();
kstat_incr_softirqs_this_cpu(vec_nr);
trace_softirq_entry(vec_nr);
h->action(h);
trace_softirq_exit(vec_nr);
if (unlikely(prev_count != preempt_count())) {
pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
vec_nr, softirq_to_name[vec_nr], h->action,
prev_count, preempt_count());
preempt_count_set(prev_count);
}
h++;
pending >>= softirq_bit;
}
rcu_bh_qs();
local_irq_disable();
pending = local_softirq_pending();
if (pending) {
if (time_before(jiffies, end) && !need_resched() &&
--max_restart)
goto restart;
wakeup_softirqd();
}
lockdep_softirq_end(in_hardirq);
account_irq_exit_time(current);
__local_bh_enable(SOFTIRQ_OFFSET); // 退软中断前softirq--
WARN_ON_ONCE(in_interrupt());
current_restore_flags(old_flags, PF_MEMALLOC);
}
这个"他用"就是表示在进程上下文中,为了防止进程被softirq所抢占,关闭/禁止softirq的次数,比如每使用一次local_bh_disable(),softirq count高7个bits(bit 9到bit 15)的值就会加1,使用local_bh_enable()则会让softirq count高7个bits的的值减1。
代码中可借助**in_softirq()**宏快速判断当前是否在softirq上下文:
#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
#define in_softirq() (softirq_count())
上下文
**in_interrupt()**的宏专门用来判断当前是否在中断上下文中
#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK | NMI_MASK))
#define in_interrupt() (irq_count())
与中断上下文相对应的就是俗称的进程上下文(process context)
#define in_task() (!(preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET | NMI_MASK)))
在中断上下文中,调度是关闭的,不会发生进程的切换,这属于一种隐式的禁止调度,而在代码中,也可以使用preempt_disable()来显示地关闭调度,关闭次数由第0到7个bits组成的preemption count(注意不是preempt count)来记录。每使用一次preempt_disable(),preemption count的值就会加1,使用preempt_enable()则会让preemption count的值减1。preemption count占8个bits,因此一共可以表示最多256层调度关闭的嵌套。
处于中断上下文,或者显示地禁止了调度,preempt_count()的值都不为0,都不允许睡眠/调度的发生,这两种场景被统称为atomic上下文,可由**in_atomic()**宏给出判断。
#define in_atomic() (preempt_count() != 0)
中断上下文、进程上下文和atomic上下文的关系大概可以表示成这样: