一、系统调用
系统调用是内核给用户程序提供的编程接口。用户程序可使用glibc库对单个系统提供的函数,或使用syscall ( ):。系统调用fork()为例:
- SYSCALL_DEFINE0(fork)
- {
- #ifdef CONFIG_MMU
- return _do_fork(SIGCHLD,0,0,NULL,NULL,0)
- #else
- Return -EINVAL;
- #endif
- }
需要在系统调用表中保存系统调用号和处理函数的映射关系,sys_call_table如下:
- /source/arch/x86/kernel/syscall_64.c
- asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] =
- {
-
-
- [0 ... __NR_syscall_max] = &sys_ni_syscall,
- #include <asm/syscalls_64.h>
- };
二、执行系统调用
系统调用划分到同步异常,在异常级别1的异常向量表中,64位调用入口为el0_sync函数。
- el0_sync:
- kernel_entry 0
- mrs x25, esr_el1
- lsr x24, x25, #ESR_ELx_EC_SHIFT
- cmp x24, #ESR_ELx_EC_SVC64
- b.eq el0_svc
- ...
el0_svc负责执行系统调用,如果上层调用open系统调用打开文件时,就会从从sys_call_table,根据系统调用号,找到对应的sys_call_table元素,也即sys_open;并执行。代码如下:
- sc_nr .reg x25
- scno .req x26
- stbl .req x27
- Tsk .req x28
-
- el0_svc:
- adrp stbl, sys_call_table
- uxtw scno, w8
- mov sc_nr, #__NR_syscalls
- el0_svc_naked:
- stp x0, scno, [sp, #S_ORIG_X0]
-
- enable_dbg
- enable_irq
-
- get_thread_info tsk
- ldr x16, [tsk, #TI_FLAGS]
- tbnz x16, #TIF_SYSCALL_TRACE, __sys_trace
- adr lr, ret_fast_syscall
- cmp scno, sc_nr
- b.hs ni_sys
- ldr x16, [stbl, scno, lsl #3]
- br x16
- ni_sys:
- mov x0, sp
- b do_ni_syscall
- ENDPROC(el0_svc)
ret_fast_syscall从系统调用返回用户空间,代码如下
- /*
- * This is the fast syscall return path. We do as little as possible here,
- * and this includes saving x0 back into the kernel stack.
- */
- ret_fast_syscall:
- disable_irq // disable interrupts
- str x0, [sp, #S_X0] // returned x0
- ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for syscall tracing
- and x2, x1, #_TIF_SYSCALL_WORK
- cbnz x2, ret_fast_syscall_trace
- and x2, x1, #_TIF_WORK_MASK
- cbnz x2, work_pending
- enable_step_tsk x1, x2
- kernel_exit 0
- ret_fast_syscall_trace:
- enable_irq // enable interrupts
- b __sys_trace_return_skipped // we already saved x0
-
- /*
- * Ok, we need to do extra processing, enter the slow path.
- */
- work_pending:
- mov x0, sp // 'regs'
- bl do_notify_resume
-
- bl trace_hardirqs_on // enabled while in userspace
-
- ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for single-step
- b finish_ret_to_user
- /*
- * "slow" syscall return path.
- */
- ret_to_user:
- disable_irq // disable interrupts
- ldr x1, [tsk, #TSK_TI_FLAGS]
- and x2, x1, #_TIF_WORK_MASK
- cbnz x2, work_pending
- finish_ret_to_user:
- enable_step_tsk x1, x2
- kernel_exit 0
- ENDPROC(ret_to_user)
work_pending调用do_notify_resume函数,代码如下:
- asmlinkage void do_notify_resume(struct pt_regs *regs,
- unsigned int thread_flags)
- {
- /*
- * The assembly code enters us with IRQs off, but it hasn't
- * informed the tracing code of that for efficiency reasons.
- * Update the trace code with the current status.
- */
- trace_hardirqs_off();
- do {
- if (thread_flags & _TIF_NEED_RESCHED) {
- schedule();
- } else {
- local_irq_enable();
-
- if (thread_flags & _TIF_UPROBE)
- uprobe_notify_resume(regs);
-
- if (thread_flags & _TIF_SIGPENDING)
- do_signal(regs);
-
- if (thread_flags & _TIF_NOTIFY_RESUME) {
- clear_thread_flag(TIF_NOTIFY_RESUME);
- tracehook_notify_resume(regs);
- }
-
- if (thread_flags & _TIF_FOREIGN_FPSTATE)
- fpsimd_restore_current_state();
- }
-
- local_irq_disable();
- thread_flags = READ_ONCE(current_thread_info()->flags);
- } while (thread_flags & _TIF_WORK_MASK);
- }