preempt_count本质上是一个per-CPU的32位变量,preempt_count代表的是该进程是否可以被抢占,peermpt_count等于0的时候当前进程就可以被抢占,当小于0存在bug,大于0说明当前进程不可以被抢占。具体每一位含义如下图所示。

preempt_count


hardirq相关

preempt_count中的第16到19个bit表示hardirq count,它记录了进入hardirq/top half的嵌套次数。irq_enter()用于标记hardirq的进入,此时hardirq count的值会加1。irq_exit()用于标记hardirq的退出,hardirq count的值会相应的减1。如果hardirq count的值为正数,说明现在正处于hardirq上下文中,代码中可借助**in_irq()**宏实现快速判断。注意这里的命名是"in_irq"而不是"in_hardirq"。

​ hardirq count占据4个bits,理论上可以表示16层嵌套,但现在Linux系统并不支持hardirq的嵌套执行,所以实际使用的只有1个bit。

#define hardirq_count()	 (preempt_count() & HARDIRQ_MASK)
#define in_irq()  (hardirq_count())

#define __irq_enter()					\
	do {						\
		account_irq_enter_time(current);	\
		preempt_count_add(HARDIRQ_OFFSET);	\
		trace_hardirq_enter();			\
	} while (0)

#define __irq_exit()					\
	do {						\
		trace_hardirq_exit();			\
		account_irq_exit_time(current);		\
		preempt_count_sub(HARDIRQ_OFFSET);	\
	} while (0)

int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
			bool lookup, struct pt_regs *regs)
{
	struct pt_regs *old_regs = set_irq_regs(regs);
	unsigned int irq = hwirq;
	int ret = 0;

	irq_enter(); // 进入中断hardirq++
#ifdef CONFIG_HISI_BB
	irq_trace_hook(0, 0, hwirq);
	irq_register_hook(old_regs);
#endif

#ifdef CONFIG_IRQ_DOMAIN
	if (lookup)
		irq = irq_find_mapping(domain, hwirq);
#endif
	/*
	 * Some hardware gives randomly wrong interrupts.  Rather
	 * than crashing, do something sensible.
	 */
	if (unlikely(!irq || irq >= nr_irqs)) {
		ack_bad_irq(irq);
		ret = -EINVAL;
	} else {
		generic_handle_irq(irq);
	}
#ifdef CONFIG_HISI_BB
	irq_trace_hook(1, 0, hwirq);
#endif
	irq_exit(); // 退出中断hardirq--
	set_irq_regs(old_regs);
	return ret;
}

softirq相关

preempt_count中的第8到15个bit表示softirq count,它记录了进入softirq的嵌套次数,如果softirq count的值为正数,说明现在正处于softirq上下文中。由于softirq在单个CPU上是不会嵌套执行的,因此和hardirq count一样,实际只需要一个bit(bit 8)就可以了。但这里多出的7个bits并不是因为历史原因多出来的,而是另有他用。

void irq_exit(void)
{
#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
	local_irq_disable();
#else
	WARN_ON_ONCE(!irqs_disabled());
#endif

	account_irq_exit_time(current);
	preempt_count_sub(HARDIRQ_OFFSET);
	if (!in_interrupt() && local_softirq_pending())
		invoke_softirq(); // 退出中断前,判断当前上下文不是软中断和硬中断,才会触发软中断(同一个cpu软中断不可嵌套)

	tick_irq_exit();
	rcu_irq_exit();
	trace_hardirq_exit(); /* must be last! */
}

asmlinkage __visible void __softirq_entry __do_softirq(void)
{
	unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
	unsigned long old_flags = current->flags;
	int max_restart = MAX_SOFTIRQ_RESTART;
	struct softirq_action *h;
	bool in_hardirq;
	__u32 pending;
	int softirq_bit;

	/*
	 * Mask out PF_MEMALLOC s current task context is borrowed for the
	 * softirq. A softirq handled such as network RX might set PF_MEMALLOC
	 * again if the socket is related to swap
	 */
	current->flags &= ~PF_MEMALLOC;

	pending = local_softirq_pending();
	account_irq_enter_time(current);

	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); // 进入软中断前softirq++
	in_hardirq = lockdep_softirq_start();

restart:
	/* Reset the pending bitmask before enabling irqs */
	set_softirq_pending(0);

	local_irq_enable();

	h = softirq_vec;

	while ((softirq_bit = ffs(pending))) {
		unsigned int vec_nr;
		int prev_count;

		h += softirq_bit - 1;

		vec_nr = h - softirq_vec;
		prev_count = preempt_count();

		kstat_incr_softirqs_this_cpu(vec_nr);

		trace_softirq_entry(vec_nr);
		h->action(h);
		trace_softirq_exit(vec_nr);
		if (unlikely(prev_count != preempt_count())) {
			pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
			       vec_nr, softirq_to_name[vec_nr], h->action,
			       prev_count, preempt_count());
			preempt_count_set(prev_count);
		}
		h++;
		pending >>= softirq_bit;
	}

	rcu_bh_qs();
	local_irq_disable();

	pending = local_softirq_pending();
	if (pending) {
		if (time_before(jiffies, end) && !need_resched() &&
		    --max_restart)
			goto restart;

		wakeup_softirqd();
	}

	lockdep_softirq_end(in_hardirq);
	account_irq_exit_time(current);
	__local_bh_enable(SOFTIRQ_OFFSET); // 退软中断前softirq--
	WARN_ON_ONCE(in_interrupt());
	current_restore_flags(old_flags, PF_MEMALLOC);
}

​ 这个"他用"就是表示在进程上下文中,为了防止进程被softirq所抢占,关闭/禁止softirq的次数,比如每使用一次local_bh_disable(),softirq count高7个bits(bit 9到bit 15)的值就会加1,使用local_bh_enable()则会让softirq count高7个bits的的值减1。

​ 代码中可借助**in_softirq()**宏快速判断当前是否在softirq上下文:

#define softirq_count()  (preempt_count() & SOFTIRQ_MASK)
#define in_softirq()	 (softirq_count())

上下文

​ **in_interrupt()**的宏专门用来判断当前是否在中断上下文中

#define irq_count()	 (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK | NMI_MASK))			 
#define in_interrupt()  (irq_count())

与中断上下文相对应的就是俗称的进程上下文(process context)

#define in_task()  (!(preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET | NMI_MASK)))

​ 在中断上下文中,调度是关闭的,不会发生进程的切换,这属于一种隐式的禁止调度,而在代码中,也可以使用preempt_disable()来显示地关闭调度,关闭次数由第0到7个bits组成的preemption count(注意不是preempt count)来记录。每使用一次preempt_disable(),preemption count的值就会加1,使用preempt_enable()则会让preemption count的值减1。preemption count占8个bits,因此一共可以表示最多256层调度关闭的嵌套。

​ 处于中断上下文,或者显示地禁止了调度,preempt_count()的值都不为0,都不允许睡眠/调度的发生,这两种场景被统称为atomic上下文,可由**in_atomic()**宏给出判断。

#define in_atomic()	(preempt_count() != 0)

​ 中断上下文、进程上下文和atomic上下文的关系大概可以表示成这样:

中断上下文、进程上下文和atomic上下文的关系


参考链接