akshithg · March 7, 2023 05:41
diff --git a/large_matrix.txt b/large_matrix.txt
 ---#
 arch/x86/kernel/process.c:119
 ---
 static int set_new_tls(struct task_struct *p, unsigned long tls)
 {
 	struct user_desc __user *utls = (struct user_desc __user *)tls;

 	if (in_ia32_syscall()) ## x ##
 		return do_set_thread_area(p, -1, utls, 0);
 	else
 		return do_set_thread_area_64(p, ARCH_SET_FS, tls);
 }
 ---
 `in_ia32_syscall()` check if the current process is in a 32-bit syscall.
 if so, it will use `do_set_thread_area()` to set the `tls` (Thread Local Storage).
 Otherwise, it will use `do_set_thread_area_64()` to set the `tls`.
 ---#
 arch/x86/kernel/cpu/mce/core.c:1519
 ---
 static void mce_timer_fn(struct timer_list *t)
 {
 	struct timer_list *cpu_t = this_cpu_ptr(&mce_timer);
 	unsigned long iv;

 	WARN_ON(cpu_t != t);

 	iv = __this_cpu_read(mce_next_interval);

 	if (mce_available(this_cpu_ptr(&cpu_info))) {
 		machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));

 		if (mce_intel_cmci_poll()) {
 			iv = mce_adjust_timer(iv);
 			goto done;
 		}
 	}

 	/*
 	 * Alert userspace if needed. If we logged an MCE, reduce the polling
 	 * interval, otherwise increase the polling interval.
 	 */
 	if (mce_notify_irq())
 		iv = max(iv / 2, (unsigned long) HZ/100); ## x ##
 	else
 		iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));

 done:
 	__this_cpu_write(mce_next_interval, iv);
 	__start_timer(t, iv);
 }
 ---
 `mce_notify_irq()` will check  a MCE (Machine Check Exception) happened. If so
 it will increase the polling interval. Otherwise, it will decrease the polling
 interval.
 ---#
 arch/x86/kernel/hpet.c:699
 ---
 static u64 read_hpet(struct clocksource *cs)
 {
 	unsigned long flags;
 	union hpet_lock old, new;

 	BUILD_BUG_ON(sizeof(union hpet_lock) != 8);

 	/*
 	 * Read HPET directly if in NMI.
 	 */
 	if (in_nmi())
 		return (u64)hpet_readl(HPET_COUNTER);

 	/*
 	 * Read the current state of the lock and HPET value atomically.
 	 */
 	old.lockval = READ_ONCE(hpet.lockval); ## x ##

 	if (arch_spin_is_locked(&old.lock))
 		goto contended;

 	local_irq_save(flags);
 	if (arch_spin_trylock(&hpet.lock)) {
 		new.value = hpet_readl(HPET_COUNTER);
 		/*
 		 * Use WRITE_ONCE() to prevent store tearing.
 		 */
 		WRITE_ONCE(hpet.value, new.value);
 		arch_spin_unlock(&hpet.lock);
 		local_irq_restore(flags);
 		return (u64)new.value;
 	}
 	local_irq_restore(flags);

 contended:
 	/*
 	 * Contended case
 	 * --------------
 	 * Wait until the HPET value change or the lock is free to indicate
 	 * its value is up-to-date.
 	 *
 	 * It is possible that old.value has already contained the latest
 	 * HPET value while the lock holder was in the process of releasing
 	 * the lock. Checking for lock state change will enable us to return
 	 * the value immediately instead of waiting for the next HPET reader
 	 * to come along.
 	 */
 	do {
 		cpu_relax();
 		new.lockval = READ_ONCE(hpet.lockval);
 	} while ((new.value == old.value) && arch_spin_is_locked(&new.lock));

 	return (u64)new.value;
 }
 ---
 `READ_ONCE()` is a macro to read a variable without any compiler optimization.
 https://www.kernel.org/doc/Documentation/memory-barriers.txt
 If `in_nmi` is true, it will read the HPET directly. Otherwise, it will
 read the HPET value atomically by using `arch_spin_trylock()`.
 ---#
 arch/x86/kernel/process.h:26
 ---
 /*
 * This needs to be inline to optimize for the common case where no extra
 * work needs to be done.
 */
 static inline void switch_to_extra(struct task_struct *prev,
 				   struct task_struct *next)
 {
 	unsigned long next_tif = task_thread_info(next)->flags;
 	unsigned long prev_tif = task_thread_info(prev)->flags;

 	if (IS_ENABLED(CONFIG_SMP)) {
 		/*
 		 * Avoid __switch_to_xtra() invocation when conditional
 		 * STIBP is disabled and the only different bit is
 		 * TIF_SPEC_IB. For CONFIG_SMP=n TIF_SPEC_IB is not
 		 * in the TIF_WORK_CTXSW masks.
 		 */
 		if (!static_branch_likely(&switch_to_cond_stibp)) { ## x ##
 			prev_tif &= ~_TIF_SPEC_IB;
 			next_tif &= ~_TIF_SPEC_IB;
 		}
 	}

 	/*
 	 * __switch_to_xtra() handles debug registers, i/o bitmaps,
 	 * speculation mitigations etc.
 	 */
 	if (unlikely(next_tif & _TIF_WORK_CTXSW_NEXT ||
 		     prev_tif & _TIF_WORK_CTXSW_PREV))
 		__switch_to_xtra(prev, next);
 }
 ---
 `static_branch_likely()` is a macro to check if a static branch is enabled.
 If not, it sets the speculation to inverse of `TIF_SPEC_IB`
 (Indirect Branch Speculation).
 ---#
 arch/x86/kernel/process_64.c:213
 ---
 /*
 * Saves the FS or GS base for an outgoing thread if FSGSBASE extensions are
 * not available.  The goal is to be reasonably fast on non-FSGSBASE systems.
 * It's forcibly inlined because it'll generate better code and this function
 * is hot.
 */
 static __always_inline void save_base_legacy(struct task_struct *prev_p,
 					     unsigned short selector,
 					     enum which_selector which)
 {
 	if (likely(selector == 0)) { ## x ##
 		/*
 		 * On Intel (without X86_BUG_NULL_SEG), the segment base could
 		 * be the pre-existing saved base or it could be zero.  On AMD
 		 * (with X86_BUG_NULL_SEG), the segment base could be almost
 		 * anything.
 		 *
 		 * This branch is very hot (it's hit twice on almost every
 		 * context switch between 64-bit programs), and avoiding
 		 * the RDMSR helps a lot, so we just assume that whatever
 		 * value is already saved is correct.  This matches historical
 		 * Linux behavior, so it won't break existing applications.
 		 *
 		 * To avoid leaking state, on non-X86_BUG_NULL_SEG CPUs, if we
 		 * report that the base is zero, it needs to actually be zero:
 		 * see the corresponding logic in load_seg_legacy.
 		 */
 	} else {
 		/*
 		 * If the selector is 1, 2, or 3, then the base is zero on
 		 * !X86_BUG_NULL_SEG CPUs and could be anything on
 		 * X86_BUG_NULL_SEG CPUs.  In the latter case, Linux
 		 * has never attempted to preserve the base across context
 		 * switches.
 		 *
 		 * If selector > 3, then it refers to a real segment, and
 		 * saving the base isn't necessary.
 		 */
 		if (which == FS)
 			prev_p->thread.fsbase = 0;
 		else
 			prev_p->thread.gsbase = 0;
 	}
 }
 ---
 this is related to branch prediction, and setting up the FS, GS base registers.
 if `selector` is likely to be 0, then noting happens
 otherwise, it will set either the `fsbase` or `gsbase` to 0.
 ---#
 arch/x86/kernel/process_64.c:629
 ---
 /*
 *	switch_to(x,y) should switch tasks from x to y.
 *
 * This could still be optimized:
 * - fold all the options into a flag word and test it with a single test.
 * - could test fs/gs bitsliced
 *
 * Kprobes not supported here. Set the probe on schedule instead.
 * Function graph tracer not supported too.
 */
 __visible __notrace_funcgraph struct task_struct *
 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 {
 	struct thread_struct *prev = &prev_p->thread;
 	struct thread_struct *next = &next_p->thread;
 	struct fpu *prev_fpu = &prev->fpu;
 	struct fpu *next_fpu = &next->fpu;
 	int cpu = smp_processor_id();

 # ...

 	/*
 	 * Switch the PDA and FPU contexts.
 	 */
 	this_cpu_write(current_task, next_p);
 	this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));

 	switch_fpu_finish(next_fpu);

 	/* Reload sp0. */
 	update_task_stack(next_p);

 	switch_to_extra(prev_p, next_p);

 	if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
 		/*
 		 * AMD CPUs have a misfeature: SYSRET sets the SS selector but
 		 * does not update the cached descriptor.  As a result, if we
 		 * do SYSRET while SS is NULL, we'll end up in user mode with
 		 * SS apparently equal to __USER_DS but actually unusable.
 		 *
 		 * The straightforward workaround would be to fix it up just
 		 * before SYSRET, but that would slow down the system call
 		 * fast paths.  Instead, we ensure that SS is never NULL in
 		 * system call context.  We do this by replacing NULL SS
 		 * selectors at every context switch.  SYSCALL sets up a valid
 		 * SS, so the only way to get NULL is to re-enter the kernel
 		 * from CPL 3 through an interrupt.  Since that can't happen
 		 * in the same task as a running syscall, we are guaranteed to
 		 * context switch between every interrupt vector entry and a
 		 * subsequent SYSRET.
 		 *
 		 * We read SS first because SS reads are much faster than
 		 * writes.  Out of caution, we force SS to __KERNEL_DS even if
 		 * it previously had a different non-NULL value.
 		 */
 		unsigned short ss_sel;
 		savesegment(ss, ss_sel);
 		if (ss_sel != __KERNEL_DS) ## x ##
 			loadsegment(ss, __KERNEL_DS);
 	}

 	/* Load the Intel cache allocation PQR MSR. */
 	resctrl_sched_in();

 	return prev_p;
 }
 ---
 `switch_to` is the function that switches from one task to another. this line is
 a fix for a known bug in AMD CPUs. SS reads are much faster than writes. Out of
 caution, SS is forced to __KERNEL_DS even if it previously had a different
 non-NULL value.
 ---#
 arch/x86/kernel/signal.c:91
 ---
 static int restore_sigcontext(struct pt_regs *regs,
 			      struct sigcontext __user *usc,
 			      unsigned long uc_flags)
 {
 	struct sigcontext sc;

 	/* Always make any pending restarted system calls return -EINTR */
 	current->restart_block.fn = do_no_restart_syscall;

 	if (copy_from_user(&sc, usc, CONTEXT_COPY_SIZE)) ## x ##
 		return -EFAULT;

 #...

 	/* Get CS/SS and force CPL3 */
 	regs->cs = sc.cs | 0x03;
 	regs->ss = sc.ss | 0x03;

 	regs->flags = (regs->flags & ~FIX_EFLAGS) | (sc.flags & FIX_EFLAGS);
 	/* disable syscall checks */
 	regs->orig_ax = -1;

 #ifdef CONFIG_X86_64
 	/*
 	 * Fix up SS if needed for the benefit of old DOSEMU and
 	 * CRIU.
 	 */
 	if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) && user_64bit_mode(regs)))
 		force_valid_ss(regs);
 #endif

 	return fpu__restore_sig((void __user *)sc.fpstate,
 			       IS_ENABLED(CONFIG_X86_32));
 }
 ---
 `restore_sigcontext` is the function that restores the context of a signal.
 `copy_from_user` here copies `CONTEXT_COPY_SIZE` bytes from the user space to
 the kernel space. `CONTEXT_COPY_SIZE` is defined as `sizeof(struct sigcontext)`,
 returns -EFAULT on failure.
 ---#
 arch/x86/kernel/signal.c:469
 ---
 static int __setup_rt_frame(int sig, struct ksignal *ksig,
 			    sigset_t *set, struct pt_regs *regs)
 {
 	struct rt_sigframe __user *frame;
 	void __user *fp = NULL;
 	unsigned long uc_flags;

 	/* x86-64 should always use SA_RESTORER. */
 	if (!(ksig->ka.sa.sa_flags & SA_RESTORER))
 		return -EFAULT;

 	frame = get_sigframe(&ksig->ka, regs, sizeof(struct rt_sigframe), &fp);
 	uc_flags = frame_uc_flags(regs);

 	if (!user_access_begin(frame, sizeof(*frame)))
 		return -EFAULT;

 	/* Create the ucontext.  */
 	unsafe_put_user(uc_flags, &frame->uc.uc_flags, Efault);
 	unsafe_put_user(0, &frame->uc.uc_link, Efault);
 	unsafe_save_altstack(&frame->uc.uc_stack, regs->sp, Efault);

 	/* Set up to return from userspace.  If provided, use a stub
 	   already in userspace.  */
 	unsafe_put_user(ksig->ka.sa.sa_restorer, &frame->pretcode, Efault);
 	unsafe_put_sigcontext(&frame->uc.uc_mcontext, fp, regs, set, Efault);
 	unsafe_put_sigmask(set, frame, Efault);
 	user_access_end();

 	if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
 		if (copy_siginfo_to_user(&frame->info, &ksig->info)) ## x ##
 			return -EFAULT;
 	}

 	/* Set up registers for signal handler */
 	regs->di = sig;
 	/* In case the signal handler was declared without prototypes */
 	regs->ax = 0;

 	/* This also works for non SA_SIGINFO handlers because they expect the
 	   next argument after the signal number on the stack. */
 	regs->si = (unsigned long)&frame->info;
 	regs->dx = (unsigned long)&frame->uc;
 	regs->ip = (unsigned long) ksig->ka.sa.sa_handler;

 	regs->sp = (unsigned long)frame;

 	/*
 	 * Set up the CS and SS registers to run signal handlers in
 	 * 64-bit mode, even if the handler happens to be interrupting
 	 * 32-bit or 16-bit code.
 	 *
 	 * SS is subtle.  In 64-bit mode, we don't need any particular
 	 * SS descriptor, but we do need SS to be valid.  It's possible
 	 * that the old SS is entirely bogus -- this can happen if the
 	 * signal we're trying to deliver is #GP or #SS caused by a bad
 	 * SS value.  We also have a compatbility issue here: DOSEMU
 	 * relies on the contents of the SS register indicating the
 	 * SS value at the time of the signal, even though that code in
 	 * DOSEMU predates sigreturn's ability to restore SS.  (DOSEMU
 	 * avoids relying on sigreturn to restore SS; instead it uses
 	 * a trampoline.)  So we do our best: if the old SS was valid,
 	 * we keep it.  Otherwise we replace it.
 	 */
 	regs->cs = __USER_CS;

 	if (unlikely(regs->ss != __USER_DS))
 		force_valid_ss(regs);

 	return 0;

 Efault:
 	user_access_end();
 	return -EFAULT;
 }
 ---
 `__setup_rt_frame` is the function that sets up the frame for a signal.
 `copy_siginfo_to_user` here copies `sizeof(struct siginfo)` bytes from the
 kernel space to the user space. `sizeof(struct siginfo)` returns -EFAULT on
 failure.
 ---#
 arch/x86/lib/insn.c:156
 ---
 /**
 * insn_get_prefixes - scan x86 instruction prefix bytes
 * @insn:	&struct insn containing instruction
 *
 * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
 * to point to the (first) opcode.  No effect if @insn->prefixes.got
 * is already set.
 */
 void insn_get_prefixes(struct insn *insn)
 {
 	struct insn_field *prefixes = &insn->prefixes;
 	insn_attr_t attr;
 	insn_byte_t b, lb;
 	int i, nb;

 	if (prefixes->got)
 		return;

 # ...
    /* Decode REX prefix */
 	if (insn->x86_64) { ## x ##
 		b = peek_next(insn_byte_t, insn);
 		attr = inat_get_opcode_attribute(b);
 		if (inat_is_rex_prefix(attr)) {
 			insn->rex_prefix.value = b;
 			insn->rex_prefix.nbytes = 1;
 			insn->next_byte++;
 			if (X86_REX_W(b))
 				/* REX.W overrides opnd_size */
 				insn->opnd_bytes = 8;
 		}
 	}
 	insn->rex_prefix.got = 1;

 # ...
 vex_end:
 	insn->vex_prefix.got = 1;

 	prefixes->got = 1;

 err_out:
 	return;
 }
 ---
 `insn_get_prefixes` is the function that gets the prefixes of an instruction.
 if `insn->x86_64` is true, `insn_get_prefixes` will try to get the REX prefix
 of the instruction. `inat_get_opcode_attribute` returns the attribute of the
 byte. `inat_is_rex_prefix` checks if the attribute is a REX prefix. `X86_REX_W`
 checks if the REX prefix is a 64-bit REX prefix. `insn->opnd_bytes` is the size
 of the operand. `insn->opnd_bytes = 8` sets the operand size to 64-bit.
 ---#
 arch/x86/mm/fault.c:1101
 ---
 static inline int
 access_error(unsigned long error_code, struct vm_area_struct *vma)
 {
 	/* This is only called for the current mm, so: */
 	bool foreign = false;

 	/*
 	 * Read or write was blocked by protection keys.  This is
 	 * always an unconditional error and can never result in
 	 * a follow-up action to resolve the fault, like a COW.
 	 */
 	if (error_code & X86_PF_PK) ## x ##
 		return 1;
 # ...
 	return 0;
 }
 ---
 `access_error` is the function that checks if the access is allowed. If the
 access is not allowed, `access_error` returns 1. `error_code` is the error
 code of the fault. `X86_PF_PK` is the bit that indicates if the access is
 blocked by protection keys.
 ---#
 arch/x86/mm/fault.c:1121
 ---
 static inline int
 access_error(unsigned long error_code, struct vm_area_struct *vma)
 {
 	/* This is only called for the current mm, so: */
 	bool foreign = false;
 # ...
 /* read, present: */
 	if (unlikely(error_code & X86_PF_PROT))
 		return 1;

 	/* read, not present: */
 	if (unlikely(!vma_is_accessible(vma)))
 		return 1;

 	return 0;
 }
 ---
 `access_error` is the function that checks if the access is allowed. If the
 access is not allowed, `access_error` returns 1. `error_code` is the error
 code of the fault. `X86_PF_PROT` is the bit that indicates if the access is
 blocked by protection keys.
 ---#
 arch/x86/mm/fault.c:1131
 ---
 bool fault_in_kernel_space(unsigned long address)
 {
 	/*
 	 * On 64-bit systems, the vsyscall page is at an address above
 	 * TASK_SIZE_MAX, but is not considered part of the kernel
 	 * address space.
 	 */
 	if (IS_ENABLED(CONFIG_X86_64) && is_vsyscall_vaddr(address)) ## x ##
 		return false;

 	return address >= TASK_SIZE_MAX;
 }
 ---
 `fault_in_kernel_space` is the function that checks if the fault is in the
 kernel space. `TASK_SIZE_MAX` is the maximum address of the user space.
 In this line `is_vsyscall_vaddr` checks if the fault is in the vsyscall (a page
 that contains the system call instructions) page and returns false if it is.
 ---#
 arch/x86/mm/fault.c:1340
 ---
 * Handle faults in the user portion of the address space */
 static inline
 void do_user_addr_fault(struct pt_regs *regs,
 			unsigned long hw_error_code,
 			unsigned long address)
 {
 	struct vm_area_struct *vma;
 	struct task_struct *tsk;
 	struct mm_struct *mm;
 	vm_fault_t fault;
 	unsigned int flags = FAULT_FLAG_DEFAULT;

 	tsk = current;
 	mm = tsk->mm;
 # ...
 	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) { ## x ##
 		bad_area(regs, hw_error_code, address);
 		return;
 	}
 # ...
    check_v8086_mode(regs, address, tsk);
 }
 ---
 `do_user_addr_fault` is the function that handles the fault in the user space.
 `VM_GROWSDOWN` is the flag that indicates if the stack grows down. If the
 stack does not grow down, `bad_area` is called.
 ---#
 arch/x86/mm/pat/memtype.c:1085
 ---
 /*
 * untrack_pfn is called while unmapping a pfnmap for a region.
 * untrack can be called for a specific region indicated by pfn and size or
 * can be for the entire vma (in which case pfn, size are zero).
 */
 void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
 		 unsigned long size)
 {
 	resource_size_t paddr;
 	unsigned long prot;

 	if (vma && !(vma->vm_flags & VM_PAT)) ## x ##
 		return;

 	/* free the chunk starting from pfn or the whole chunk */
 	paddr = (resource_size_t)pfn << PAGE_SHIFT;
 	if (!paddr && !size) {
 		if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
 			WARN_ON_ONCE(1);
 			return;
 		}

 		size = vma->vm_end - vma->vm_start;
 	}
 	free_pfn_range(paddr, size);
 	if (vma)
 		vma->vm_flags &= ~VM_PAT;
 }
 ---
 `untrack_pfn` is the function that untracks the pfn (physical frame number).
 `VM_PAT` is the flag that indicates if the page is tracked. If the page is not
 tracked, `untrack_pfn` returns.
 ---#
 arch/x86/mm/tlb.c:559
 ---
 void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 			struct task_struct *tsk)
 {
 	struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
 	u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
 	bool was_lazy = this_cpu_read(cpu_tlbstate.is_lazy);
 	unsigned cpu = smp_processor_id();
 	u64 next_tlb_gen;
 	bool need_flush;
 	u16 new_asid;
 # ...

 	if (need_flush) { ## x ##
 		this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
 		this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
 		load_new_mm_cr3(next->pgd, new_asid, true);

 		trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
 	} else {
 		/* The new ASID is already up to date. */
 		load_new_mm_cr3(next->pgd, new_asid, false);

 		trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
 	}

 	/* Make sure we write CR3 before loaded_mm. */
 	barrier();

 	this_cpu_write(cpu_tlbstate.loaded_mm, next);
 	this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);

 	if (next != real_prev) {
 		cr4_update_pce_mm(next);
 		switch_ldt(real_prev, next);
 	}
 }
 ---
 `switch_mm_irqs_off` is the function that switches the mm. `need_flush` is the
 variable that indicates if the TLB needs to be flushed. If `need_flush` is
 true, the TLB is flushed.
 ---#
 arch/x86/mm/tlb.c:598
 ---
 /*
 * Please ignore the name of this function.  It should be called
 * switch_to_kernel_thread().
 *
 * enter_lazy_tlb() is a hint from the scheduler that we are entering a
 * kernel thread or other context without an mm.  Acceptable implementations
 * include doing nothing whatsoever, switching to init_mm, or various clever
 * lazy tricks to try to minimize TLB flushes.
 *
 * The scheduler reserves the right to call enter_lazy_tlb() several times
 * in a row.  It will notify us that we're going back to a real mm by
 * calling switch_mm_irqs_off().
 */
 void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 {
 	if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm) ## x ##
 		return;

 	this_cpu_write(cpu_tlbstate.is_lazy, true);
 }
 ---
 this functions is called when the scheduler enters a kernel thread without a
 mm. new mm is set to `init_mm` and returns. Else `cpu_tlbstate.is_lazy` is set
 to true.
 ---#
 arch/x86/mm/tlb.c:818
 ---
 STATIC_NOPV void native_flush_tlb_others(const struct cpumask *cpumask,
 					 const struct flush_tlb_info *info)
 {
 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
 	if (info->end == TLB_FLUSH_ALL)
 		trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
 	else
 		trace_tlb_flush(TLB_REMOTE_SEND_IPI,
 				(info->end - info->start) >> PAGE_SHIFT);

 	/*
 	 * If no page tables were freed, we can skip sending IPIs to
 	 * CPUs in lazy TLB mode. They will flush the CPU themselves
 	 * at the next context switch.
 	 *
 	 * However, if page tables are getting freed, we need to send the
 	 * IPI everywhere, to prevent CPUs in lazy TLB mode from tripping
 	 * up on the new contents of what used to be page tables, while
 	 * doing a speculative memory access.
 	 */
 	if (info->freed_tables) ## x ##
 		smp_call_function_many(cpumask, flush_tlb_func_remote,
 			       (void *)info, 1);
 	else
 		on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func_remote,
 				(void *)info, 1, cpumask);
 }
 ---
 `native_flush_tlb_others` is the function that flushes the TLB of other CPUs.
 If `info->freed_tables` is true, the TLB of all CPUs is flushed. Else, the
 TLB of CPUs that are not in lazy mode is flushed.
 ---#
 block/bio.c:225
 ---
 struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx,
 			   mempool_t *pool)
 {
 	struct bio_vec *bvl;
 # ...

 		/*
 		 * Try a slab allocation. If this fails and __GFP_DIRECT_RECLAIM
 		 * is set, retry with the 1-entry mempool
 		 */
 		bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
 		if (unlikely(!bvl && (gfp_mask & __GFP_DIRECT_RECLAIM))) { ## x ##
 			*idx = BVEC_POOL_MAX;
 			goto fallback;
 		}
 	}

 	(*idx)++;
 	return bvl;
 }
 ---
 `bvec_alloc` is the function that allocates a bio_vec (a bio_vec is a vector of
 pages). ref:http://books.gigatux.nl/mirror/kerneldevelopment/0672327201/ch13lev1sec3.html
 If the allocation fails and `__GFP_DIRECT_RECLAIM` is set, the allocation is
 retried with the 1-entry mempool.
 ---#
 /block/bio.c:503
 ---

 /**
 * bio_alloc_bioset - allocate a bio for I/O
 * @gfp_mask:   the GFP_* mask given to the slab allocator
 * @nr_iovecs:	number of iovecs to pre-allocate
 * @bs:		the bio_set to allocate from.
 *
 * ...
 */

 struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 {
 struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
 			     struct bio_set *bs)
 {
 	gfp_t saved_gfp = gfp_mask;
 	unsigned front_pad;
 	unsigned inline_vecs;
 	struct bio_vec *bvl = NULL;
 	struct bio *bio;
 	void *p;

 # ...

 	if (nr_iovecs > inline_vecs) { ## x ##
 		unsigned long idx = 0;

 		bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, &bs->bvec_pool);
 		if (!bvl && gfp_mask != saved_gfp) {
 			punt_bios_to_rescuer(bs);
 			gfp_mask = saved_gfp;
 			bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, &bs->bvec_pool);
 		}

 		if (unlikely(!bvl))
 			goto err_free;

 		bio->bi_flags |= idx << BVEC_POOL_OFFSET;
 	} else if (nr_iovecs) {
 		bvl = bio->bi_inline_vecs;
 	}


 	bio->bi_pool = bs;
 	bio->bi_max_vecs = nr_iovecs;
 	bio->bi_io_vec = bvl;
 	return bio;

 err_free:
 	mempool_free(p, &bs->bio_pool);
 	return NULL;
 }
 ---
 `bio_alloc_bioset` is the function that allocates a bio. If `nr_iovecs` is
 greater than `inline_vecs`, the allocation is retried with the 1-entry mempool.
 Else if `nr_iovecs` is not 0, `bvl` is set to `bio->bi_inline_vecs` (a vector of
 pages).
 ---#
 block/bio.c:880
 ---
 /**
 * __bio_try_merge_page - try appending data to an existing bvec.
 * @bio: destination bio
 * @page: start page to add
 * @len: length of the data to add
 * @off: offset of the data relative to @page
 * @same_page: return if the segment has been merged inside the same page
 *
 * Try to add the data at @page + @off to the last bvec of @bio.  This is a
 * useful optimisation for file systems with a block size smaller than the
 * page size.
 *
 * Warn if (@len, @off) crosses pages in case that @same_page is true.
 *
 * Return %true on success or %false on failure.
 */
 bool __bio_try_merge_page(struct bio *bio, struct page *page,
 		unsigned int len, unsigned int off, bool *same_page)
 {
 	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
 		return false;

 	if (bio->bi_vcnt > 0) {
 		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];

 		if (page_is_mergeable(bv, page, len, off, same_page)) {
 			if (bio->bi_iter.bi_size > UINT_MAX - len) { ## x ##
 				*same_page = false;
 				return false;
 			}
 			bv->bv_len += len;
 			bio->bi_iter.bi_size += len;
 			return true;
 		}
 	}
 	return false;
 }
 ---
 `__bio_try_merge_page` is the function that tries to append data to an existing
 bvec. If `bio->bi_vcnt` is greater than 0, the last bvec of `bio` is retrieved
 and if the page is mergeable, the length of the bvec is increased by `len` and
 the size of the bio is increased by `len`. If the size of the bio is greater
 than `UINT_MAX - len`, the page is not merged.
 ---#
 block/bio.c:918
 ---
 /**
 * __bio_add_page - add page(s) to a bio in a new segment
 * @bio: destination bio
 * @page: start page to add
 * @len: length of the data to add, may cross pages
 * @off: offset of the data relative to @page, may cross pages
 *
 * Add the data at @page + @off to @bio as a new bvec.  The caller must ensure
 * that @bio has space for another bvec.
 */
 void __bio_add_page(struct bio *bio, struct page *page,
 		unsigned int len, unsigned int off)
 {
 	struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];

 	WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
 	WARN_ON_ONCE(bio_full(bio, len));

 	bv->bv_page = page;
 	bv->bv_offset = off;
 	bv->bv_len = len;

 	bio->bi_iter.bi_size += len;
 	bio->bi_vcnt++;

 	if (!bio_flagged(bio, BIO_WORKINGSET) && unlikely(PageWorkingset(page))) ## x ##
 		bio_set_flag(bio, BIO_WORKINGSET);
 }
 EXPORT_SYMBOL_GPL(__bio_add_page);
 ---
 `__bio_add_page` is the function that adds a page to a bio in a new segment. The
 caller must ensure that the bio has space for another bvec. The page is added
 to the bio and the size of the bio is increased by `len`. If the page is in the
 workingset, the bio is flagged as being in the workingset (a workingset is a
 set of pages that are frequently accessed).
 ---#
 block/blk-core.c:832
 ---
 static noinline_for_stack bool submit_bio_checks(struct bio *bio)
 {
 	struct request_queue *q = bio->bi_disk->queue;
 	blk_status_t status = BLK_STS_IOERR;
 	struct blk_plug *plug;

 	might_sleep();

 	plug = blk_mq_plug(q, bio);
 	if (plug && plug->nowait)
 		bio->bi_opf |= REQ_NOWAIT;

 	/*
 	 * For a REQ_NOWAIT based request, return -EOPNOTSUPP
 	 * if queue does not support NOWAIT.
 	 */
 	if ((bio->bi_opf & REQ_NOWAIT) && !blk_queue_nowait(q))
 		goto not_supported;

 	if (should_fail_bio(bio))
 		goto end_io;

 	if (bio->bi_partno) {
 		if (unlikely(blk_partition_remap(bio)))
 			goto end_io;
 	} else {
 		if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0))) ## x ##
 			goto end_io;
 		if (unlikely(bio_check_eod(bio, get_capacity(bio->bi_disk))))
 			goto end_io;
 	}

 # ...

 not_supported:
 	status = BLK_STS_NOTSUPP;
 end_io:
 	bio->bi_status = status;
 	bio_endio(bio);
 	return false;
 }
 ---
 `submit_bio_checks` is the function that checks if a bio can be submitted. If
 the bio is read-only, the bio is ended and `false` is returned.
 ---#
 block/blk-core.c:1269
 ---
 static void update_io_ticks(struct hd_struct *part, unsigned long now, bool end)
 {
 	unsigned long stamp;
 again:
 	stamp = READ_ONCE(part->stamp);
 	if (unlikely(stamp != now)) { ## x ##
 		if (likely(cmpxchg(&part->stamp, stamp, now) == stamp))
 			__part_stat_add(part, io_ticks, end ? now - stamp : 1);
 	}
 	if (part->partno) {
 		part = &part_to_disk(part)->part0;
 		goto again;
 	}
 }
 ---
 `update_io_ticks` is the function that updates the io ticks of a partition.
 `part->stamp` is the timestamp of the last io operation. If `part->stamp` is
 different than `now`, `part->stamp` is updated to `now` and the io ticks of
 `part` are increased by 1. If `end` is true, the io ticks of `part` are
 increased by `now - stamp`.
 ---#
 block/blk-core.c:1272
 ---
 static void update_io_ticks(struct hd_struct *part, unsigned long now, bool end)
 {
 	unsigned long stamp;
 again:
 	stamp = READ_ONCE(part->stamp);
 	if (unlikely(stamp != now)) {
 		if (likely(cmpxchg(&part->stamp, stamp, now) == stamp))
 			__part_stat_add(part, io_ticks, end ? now - stamp : 1);
 	}
 	if (part->partno) { ## x ##
 		part = &part_to_disk(part)->part0;
 		goto again;
 	}
 }
 ---
 `update_io_ticks` is the function that updates the io ticks of a partition.
 `part->partno` is the partition number. If `part->partno` is not 0, the
 partition of `part` is retrieved and the function is called again.
 ---#
 block/blk-core.c:1775
 ---
 /**
 * blk_finish_plug - mark the end of a batch of submitted I/O
 * @plug:	The &struct blk_plug passed to blk_start_plug()
 *
 * Description:
 * Indicate that a batch of I/O submissions is complete.  This function
 * must be paired with an initial call to blk_start_plug().  The intent
 * is to allow the block layer to optimize I/O submission.  See the
 * documentation for blk_start_plug() for more information.
 */
 void blk_finish_plug(struct blk_plug *plug)
 {
 	if (plug != current->plug) ## x ##
 		return;
 	blk_flush_plug_list(plug, false);

 	current->plug = NULL;
 }
 ---
 `blk_finish_plug` is the function that marks the end of a batch of submitted
 io. If `plug` is not the current plug, the function returns. Else the plug list
 is flushed and the current plug is set to `NULL`.
 ---#
 block/blk-mq-sched.c:341
 ---
 /*
 * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
 * its queue by itself in its completion handler, so we don't need to
 * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
 *
 * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
 * be run again.  This is necessary to avoid starving flushes.
 */
 static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
 {
 	struct request_queue *q = hctx->queue;
 	struct elevator_queue *e = q->elevator;
 	bool multi_hctxs = false, run_queue = false;
 	bool dispatched = false, busy = false;
 	unsigned int max_dispatch;
 	LIST_HEAD(rq_list);
 	int count = 0;
 # ...
 	if (busy) ## x ##
 		return -EAGAIN;
 	return !!dispatched;
 }
 ---
 `__blk_mq_do_dispatch_sched` is the function that dispatches requests from the
 scheduler. If the hctx (hardware context) is busy, -EAGAIN is returned. Else 0
 is returned.
 ---#
 block/blk-mq.c:605
 ---
 static void blk_mq_trigger_softirq(struct request *rq)
 {
 	struct list_head *list;
 	unsigned long flags;

 	local_irq_save(flags);
 	list = this_cpu_ptr(&blk_cpu_done);
 	list_add_tail(&rq->ipi_list, list);

 	/*
 	 * If the list only contains our just added request, signal a raise of
 	 * the softirq.  If there are already entries there, someone already
 	 * raised the irq but it hasn't run yet.
 	 */
 	if (list->next == &rq->ipi_list)
 		raise_softirq_irqoff(BLOCK_SOFTIRQ); ## x ##
 	local_irq_restore(flags);
 }
 ---
 `blk_mq_trigger_softirq` is the function that triggers the softirq.
 if `list->next` is equal to `&rq->ipi_list`, it means the list only contains
 entries added by the current cpu. In this case, the softirq is raised.
 Else, the softirq is already raised and it hasn't run yet.
 ---#
 /block/bounce.c:377
 ---
 void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
 {
 	mempool_t *pool;

 	/*
 	 * Data-less bio, nothing to bounce
 	 */
 	if (!bio_has_data(*bio_orig))
 		return;

 	/*
 	 * for non-isa bounce case, just check if the bounce pfn is equal
 	 * to or bigger than the highest pfn in the system -- in that case,
 	 * don't waste time iterating over bio segments
 	 */
 	if (!(q->bounce_gfp & GFP_DMA)) {
 		if (q->limits.bounce_pfn >= blk_max_pfn) ## x ##
 			return;
 		pool = &page_pool;
 	} else {
 		BUG_ON(!mempool_initialized(&isa_page_pool));
 		pool = &isa_page_pool;
 	}

 	/*
 	 * slow path
 	 */
 	__blk_queue_bounce(q, bio_orig, pool);
 }
 ---
 `blk_queue_bounce` is the function that bounces a bio. If the bounce pfn is
 equal to or bigger than the highest pfn in the system, the function returns.
 ---#
 fs/exec.c:449
 ---
 **
 * count() counts the number of strings in array ARGV.
 */
 static int count(struct user_arg_ptr argv, int max)
 {
 	int i = 0;

 	if (argv.ptr.native != NULL) {
 		for (;;) {
 			const char __user *p = get_user_arg_ptr(argv, i);

 			if (!p)
 				break;

 			if (IS_ERR(p))
 				return -EFAULT;

 			if (i >= max)
 				return -E2BIG;
 			++i;

 			if (fatal_signal_pending(current))
 				return -ERESTARTNOHAND;
 			cond_resched(); ## x ##
 		}
 	}
 	return i;
 }
 ---
 `count` is the function that counts the number of strings in an array.
 `p` is the string at index `i` in the array.
 `cond_resched` is called to reschedule the current task.
 ---#
 fs/exec.c:1022
 ---
 /*
 * Maps the mm_struct mm into the current task struct.
 * On success, this function returns with the mutex
 * exec_update_mutex locked.
 */
 static int exec_mmap(struct mm_struct *mm)
 {
 	struct task_struct *tsk;
 	struct mm_struct *old_mm, *active_mm;
 	int ret;
 # ...
 	if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
 		local_irq_enable();
 	activate_mm(active_mm, mm);
 	if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
 		local_irq_enable();
 	tsk->mm->vmacache_seqnum = 0; ## x ##
 	vmacache_flush(tsk);
 	task_unlock(tsk);
 	if (old_mm) {
 		mmap_read_unlock(old_mm);
 		BUG_ON(active_mm != old_mm);
 		setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm);
 		mm_update_next_owner(old_mm);
 		mmput(old_mm);
 		return 0;
 	}
 	mmdrop(active_mm);
 	return 0;
 }
 ---
 `exec_mmap` is the function that maps the mm_struct `mm` into the current task
 struct.
 If `CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM` is enabled (which is the case for
 x86), `local_irq_enable` is called before `activate_mm`.
 ---#
 fs/exec.c:1850
 ---
 /*
 * sys_execve() executes a new program.
 */
 static int bprm_execve(struct linux_binprm *bprm,
 		       int fd, struct filename *filename, int flags)
 {
 	struct file *file;
 	struct files_struct *displaced;
 	int retval;

 	/*
 	 * Cancel any io_uring activity across execve
 	 */
 	io_uring_task_cancel();

 	retval = unshare_files(&displaced);
 	if (retval)
 		return retval;
 # ...
 out_files:
 	if (displaced)
 		reset_files_struct(displaced); ## x ##

 	return retval;
 }
 ---
 `bprm_execve` is the function that executes a new program.
 `displaced` is the files struct that is displaced by the new files struct.
 `reset_files_struct` is called to reset the files struct.
 ---#
diff --git a/sample-annotation.txt b/sample-annotation.txt
 ---#
 source file : line number
 ---
 code line a
 code line b  ## x ## <--------- marks the specific line noted above
 code line c
 ---
 description of the code


 an example:

 ---#
 arch/x86/kernel/process.c:119
 ---
 static int set_new_tls(struct task_struct *p, unsigned long tls)
 {
 	struct user_desc __user *utls = (struct user_desc __user *)tls;

 	if (in_ia32_syscall()) ## x ## 
 		return do_set_thread_area(p, -1, utls, 0);
 	else
 		return do_set_thread_area_64(p, ARCH_SET_FS, tls);
 }
 ---
 `in_ia32_syscall()` check if the current process is in a 32-bit syscall.
 if so, it will use `do_set_thread_area()` to set the `tls` (Thread Local Storage).
 Otherwise, it will use `do_set_thread_area_64()` to set the `tls`.
diff --git a/small_martix.txt b/small_martix.txt
 ---#
 fs/exec.c:196
 ---
 /*
 * The nascent bprm->mm is not visible until exec_mmap() but it can
 * use a lot of memory, account these pages in current->mm temporary
 * for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we
 * change the counter back via acct_arg_size(0).
 */
 static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
 {
 	struct mm_struct *mm = current->mm;
 	long diff = (long)(pages - bprm->vma_pages);

 	if (!mm || !diff)
 		return;

 	bprm->vma_pages = pages; ## x ##
 	add_mm_counter(mm, MM_ANONPAGES, diff);
 }
 ---
 `acct_arg_size` is the function that accounts the size of the arguments.
 `add_mm_counter` is called to add the counter `diff` to the mm counter
 if `mm` or `diff` fails to be allocated, the function returns before adding the
 counter.
 ---#
 block/bio.c:258
 ---
 static void bio_free(struct bio *bio)
 {
 	struct bio_set *bs = bio->bi_pool;
 	void *p;

 	bio_uninit(bio);

 	if (bs) {
 		bvec_free(&bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio)); ## x ##

 		/*
 		 * If we have front padding, adjust the bio pointer before freeing
 		 */
 		p = bio;
 		p -= bs->front_pad;

 		mempool_free(p, &bs->bio_pool);
 	} else {
 		/* Bio was allocated by bio_kmalloc() */
 		kfree(bio);
 	}
 }
 ---
 `bio_free` is the function that frees the bio.
 `bs` is the bio_set that the bio belongs to. if `bs` is null, then the bio was
 allocated by `bio_kmalloc` and is freed by `kfree`. Otherwise, the bio is freed
 by `mempool_free`.
 ---#
 block/bio.c:876
 ---
 /**
 * __bio_try_merge_page - try appending data to an existing bvec.
 * @bio: destination bio
 * @page: start page to add
 * @len: length of the data to add
 * @off: offset of the data relative to @page
 * @same_page: return if the segment has been merged inside the same page
 *
 * Try to add the data at @page + @off to the last bvec of @bio.  This is a
 * useful optimisation for file systems with a block size smaller than the
 * page size.
 *
 * Warn if (@len, @off) crosses pages in case that @same_page is true.
 *
 * Return %true on success or %false on failure.
 */
 bool __bio_try_merge_page(struct bio *bio, struct page *page,
 		unsigned int len, unsigned int off, bool *same_page)
 {
 	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
 		return false;

 	if (bio->bi_vcnt > 0) { ## x ##
 		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];

 		if (page_is_mergeable(bv, page, len, off, same_page)) {
 			if (bio->bi_iter.bi_size > UINT_MAX - len) {
 				*same_page = false;
 				return false;
 			}
 			bv->bv_len += len;
 			bio->bi_iter.bi_size += len;
 			return true;
 		}
 	}
 	return false;
 }
 ---
 `__bio_try_merge_page` is the function that tries to merge the page to the bio.
 `bio->bi_vcnt` is the number of bio_vecs in the bio. if `bio->bi_vcnt` is
 greater than 0, then the bio has at least one bio_vec. `bv` is the last bio_vec
 in the bio.
 ---#
 mm/memory.c:488
 ---
 static inline void add_mm_rss_vec(struct mm_struct *mm, int *rss)
 {
 	int i;

 	if (current->mm == mm)
 		sync_mm_rss(mm); ## x ##
 	for (i = 0; i < NR_MM_COUNTERS; i++)
 		if (rss[i])
 			add_mm_counter(mm, i, rss[i]);
 }
 ---
 `add_mm_rss_vec` is the function that adds the rss (Resident Set Size and is
 used to show how much memory is allocated to that process and is in RAM) to the
 mm counter.
 ---#
 mm/memory.c:1444
 ---
 static void unmap_single_vma(struct mmu_gather *tlb,
 		struct vm_area_struct *vma, unsigned long start_addr,
 		unsigned long end_addr,
 		struct zap_details *details)
 {
 	unsigned long start = max(vma->vm_start, start_addr);
 	unsigned long end;

 	if (start >= vma->vm_end)
 		return; ## x ##
 # ...
 }
 ---
 `unmap_single_vma` is the function that unmaps a single vma (virtual memory area)
 from the tlb (Translation Lookaside Buffer).
 `start` is the maximum of the vma start and the start address. if `start` is
 greater than or equal to the vma end, then the function returns.
 ---#
diff --git a/trace-notes.md b/trace-notes.md
	---#
	source file : line number
	---
	code line a
	code line b ## x ## <--------- marks the specific line noted above
	code line c
	---
	description of the code


	an example:

	---#
	arch/x86/kernel/process.c:119
	---
	static int set_new_tls(struct task_struct *p, unsigned long tls)
	{
	struct user_desc __user utls = (struct user_desc __user )tls;

	if (in_ia32_syscall()) ## x ##
	return do_set_thread_area(p, -1, utls, 0);
	else
	return do_set_thread_area_64(p, ARCH_SET_FS, tls);
	}
	---
	`in_ia32_syscall()` check if the current process is in a 32-bit syscall.
	if so, it will use `do_set_thread_area()` to set the `tls` (Thread Local Storage).
	Otherwise, it will use `do_set_thread_area_64()` to set the `tls`.
	---#
	fs/exec.c:196
	---
	/*
	* The nascent bprm->mm is not visible until exec_mmap() but it can
	* use a lot of memory, account these pages in current->mm temporary
	* for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we
	* change the counter back via acct_arg_size(0).
	*/
	static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
	{
	struct mm_struct *mm = current->mm;
	long diff = (long)(pages - bprm->vma_pages);

	if (!mm \|\| !diff)
	return;

	bprm->vma_pages = pages; ## x ##
	add_mm_counter(mm, MM_ANONPAGES, diff);
	}
	---
	`acct_arg_size` is the function that accounts the size of the arguments.
	`add_mm_counter` is called to add the counter `diff` to the mm counter
	if `mm` or `diff` fails to be allocated, the function returns before adding the
	counter.
	---#
	block/bio.c:258
	---
	static void bio_free(struct bio *bio)
	{
	struct bio_set *bs = bio->bi_pool;
	void *p;

	bio_uninit(bio);

	if (bs) {
	bvec_free(&bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio)); ## x ##

	/*
	* If we have front padding, adjust the bio pointer before freeing
	*/
	p = bio;
	p -= bs->front_pad;

	mempool_free(p, &bs->bio_pool);
	} else {
	/* Bio was allocated by bio_kmalloc() */
	kfree(bio);
	}
	}
	---
	`bio_free` is the function that frees the bio.
	`bs` is the bio_set that the bio belongs to. if `bs` is null, then the bio was
	allocated by `bio_kmalloc` and is freed by `kfree`. Otherwise, the bio is freed
	by `mempool_free`.
	---#
	block/bio.c:876
	---
	/**
	* __bio_try_merge_page - try appending data to an existing bvec.
	* @bio: destination bio
	* @page: start page to add
	* @len: length of the data to add
	* @off: offset of the data relative to @page
	* @same_page: return if the segment has been merged inside the same page
	*
	* Try to add the data at @page + @off to the last bvec of @bio. This is a
	* useful optimisation for file systems with a block size smaller than the
	* page size.
	*
	* Warn if (@len, @off) crosses pages in case that @same_page is true.
	*
	* Return %true on success or %false on failure.
	*/
	bool __bio_try_merge_page(struct bio bio, struct page page,
	unsigned int len, unsigned int off, bool *same_page)
	{
	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
	return false;

	if (bio->bi_vcnt > 0) { ## x ##
	struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];

	if (page_is_mergeable(bv, page, len, off, same_page)) {
	if (bio->bi_iter.bi_size > UINT_MAX - len) {
	*same_page = false;
	return false;
	}
	bv->bv_len += len;
	bio->bi_iter.bi_size += len;
	return true;
	}
	}
	return false;
	}
	---
	`__bio_try_merge_page` is the function that tries to merge the page to the bio.
	`bio->bi_vcnt` is the number of bio_vecs in the bio. if `bio->bi_vcnt` is
	greater than 0, then the bio has at least one bio_vec. `bv` is the last bio_vec
	in the bio.
	---#
	mm/memory.c:488
	---
	static inline void add_mm_rss_vec(struct mm_struct mm, int rss)
	{
	int i;

	if (current->mm == mm)
	sync_mm_rss(mm); ## x ##
	for (i = 0; i < NR_MM_COUNTERS; i++)
	if (rss[i])
	add_mm_counter(mm, i, rss[i]);
	}
	---
	`add_mm_rss_vec` is the function that adds the rss (Resident Set Size and is
	used to show how much memory is allocated to that process and is in RAM) to the
	mm counter.
	---#
	mm/memory.c:1444
	---
	static void unmap_single_vma(struct mmu_gather *tlb,
	struct vm_area_struct *vma, unsigned long start_addr,
	unsigned long end_addr,
	struct zap_details *details)
	{
	unsigned long start = max(vma->vm_start, start_addr);
	unsigned long end;

	if (start >= vma->vm_end)
	return; ## x ##
	# ...
	}
	---
	`unmap_single_vma` is the function that unmaps a single vma (virtual memory area)
	from the tlb (Translation Lookaside Buffer).
	`start` is the maximum of the vma start and the start address. if `start` is
	greater than or equal to the vma end, then the function returns.
	---#