xref: /DragonOS/kernel/src/arch/x86_64/process/mod.rs (revision 971462be94ba0a5c74af7a5f9653dfabd4932a63)
1 use core::{
2     arch::asm,
3     intrinsics::unlikely,
4     mem::ManuallyDrop,
5     sync::atomic::{compiler_fence, Ordering},
6 };
7 
8 use alloc::{
9     string::String,
10     sync::{Arc, Weak},
11     vec::Vec,
12 };
13 
14 use memoffset::offset_of;
15 use x86::{controlregs::Cr4, segmentation::SegmentSelector};
16 
17 use crate::{
18     arch::process::table::TSSManager,
19     exception::InterruptArch,
20     kwarn,
21     libs::spinlock::SpinLockGuard,
22     mm::{
23         percpu::{PerCpu, PerCpuVar},
24         VirtAddr,
25     },
26     process::{
27         fork::{CloneFlags, KernelCloneArgs},
28         KernelStack, ProcessControlBlock, ProcessFlags, ProcessManager, SwitchResult,
29         SWITCH_RESULT,
30     },
31     syscall::{Syscall, SystemError},
32 };
33 
34 use self::{
35     kthread::kernel_thread_bootstrap_stage1,
36     syscall::ARCH_SET_FS,
37     table::{switch_fs_and_gs, KERNEL_DS, USER_DS},
38 };
39 
40 use super::{fpu::FpState, interrupt::TrapFrame, CurrentIrqArch};
41 
42 mod c_adapter;
43 pub mod kthread;
44 pub mod syscall;
45 pub mod table;
46 
47 pub const IA32_FS_BASE: u32 = 0xC000_0100;
48 pub const IA32_GS_BASE: u32 = 0xC000_0101;
49 
50 extern "C" {
51     /// 从中断返回
52     fn ret_from_intr();
53 }
54 
55 #[allow(dead_code)]
56 #[repr(align(32768))]
57 union InitProcUnion {
58     /// 用于存放idle进程的内核栈
59     idle_stack: [u8; 32768],
60 }
61 
62 #[link_section = ".data.init_proc_union"]
63 #[no_mangle]
64 static BSP_IDLE_STACK_SPACE: InitProcUnion = InitProcUnion {
65     idle_stack: [0; 32768],
66 };
67 
68 /// PCB中与架构相关的信息
69 #[derive(Debug, Clone)]
70 #[allow(dead_code)]
71 pub struct ArchPCBInfo {
72     rflags: usize,
73     rbx: usize,
74     r12: usize,
75     r13: usize,
76     r14: usize,
77     r15: usize,
78     rbp: usize,
79     rsp: usize,
80     rip: usize,
81     cr2: usize,
82     fsbase: usize,
83     gsbase: usize,
84     fs: u16,
85     gs: u16,
86 
87     /// 浮点寄存器的状态
88     fp_state: Option<FpState>,
89 }
90 
91 #[allow(dead_code)]
92 impl ArchPCBInfo {
93     /// 创建一个新的ArchPCBInfo
94     ///
95     /// ## 参数
96     ///
97     /// - `kstack`:内核栈的引用,如果为None,则不会设置rsp和rbp。如果为Some,则会设置rsp和rbp为内核栈的最高地址。
98     ///
99     /// ## 返回值
100     ///
101     /// 返回一个新的ArchPCBInfo
102     pub fn new(kstack: Option<&KernelStack>) -> Self {
103         let mut r = Self {
104             rflags: 0,
105             rbx: 0,
106             r12: 0,
107             r13: 0,
108             r14: 0,
109             r15: 0,
110             rbp: 0,
111             rsp: 0,
112             rip: 0,
113             cr2: 0,
114             fsbase: 0,
115             gsbase: 0,
116             fs: KERNEL_DS.bits(),
117             gs: KERNEL_DS.bits(),
118             fp_state: None,
119         };
120 
121         if kstack.is_some() {
122             let kstack = kstack.unwrap();
123             r.rsp = kstack.stack_max_address().data();
124             r.rbp = kstack.stack_max_address().data();
125         }
126 
127         return r;
128     }
129 
130     pub fn set_stack(&mut self, stack: VirtAddr) {
131         self.rsp = stack.data();
132     }
133 
134     pub fn set_stack_base(&mut self, stack_base: VirtAddr) {
135         self.rbp = stack_base.data();
136     }
137 
138     pub fn rbp(&self) -> usize {
139         self.rbp
140     }
141 
142     pub unsafe fn push_to_stack(&mut self, value: usize) {
143         self.rsp -= core::mem::size_of::<usize>();
144         *(self.rsp as *mut usize) = value;
145     }
146 
147     pub unsafe fn pop_from_stack(&mut self) -> usize {
148         let value = *(self.rsp as *const usize);
149         self.rsp += core::mem::size_of::<usize>();
150         value
151     }
152 
153     pub fn save_fp_state(&mut self) {
154         if self.fp_state.is_none() {
155             self.fp_state = Some(FpState::new());
156         }
157 
158         self.fp_state.as_mut().unwrap().save();
159     }
160 
161     pub fn restore_fp_state(&mut self) {
162         if unlikely(self.fp_state.is_none()) {
163             return;
164         }
165 
166         self.fp_state.as_mut().unwrap().restore();
167     }
168 
169     /// 返回浮点寄存器结构体的副本
170     pub fn fp_state(&self) -> &Option<FpState> {
171         &self.fp_state
172     }
173 
174     // 清空浮点寄存器
175     pub fn clear_fp_state(&mut self) {
176         if unlikely(self.fp_state.is_none()) {
177             kwarn!("fp_state is none");
178             return;
179         }
180 
181         self.fp_state.as_mut().unwrap().clear();
182     }
183     pub unsafe fn save_fsbase(&mut self) {
184         if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
185             self.fsbase = x86::current::segmentation::rdfsbase() as usize;
186         } else {
187             self.fsbase = x86::msr::rdmsr(IA32_FS_BASE) as usize;
188         }
189     }
190 
191     pub unsafe fn save_gsbase(&mut self) {
192         if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
193             self.gsbase = x86::current::segmentation::rdgsbase() as usize;
194         } else {
195             self.gsbase = x86::msr::rdmsr(IA32_GS_BASE) as usize;
196         }
197     }
198 
199     pub unsafe fn restore_fsbase(&mut self) {
200         if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
201             x86::current::segmentation::wrfsbase(self.fsbase as u64);
202         } else {
203             x86::msr::wrmsr(IA32_FS_BASE, self.fsbase as u64);
204         }
205     }
206 
207     pub unsafe fn restore_gsbase(&mut self) {
208         if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
209             x86::current::segmentation::wrgsbase(self.gsbase as u64);
210         } else {
211             x86::msr::wrmsr(IA32_GS_BASE, self.gsbase as u64);
212         }
213     }
214 
215     pub fn fsbase(&self) -> usize {
216         self.fsbase
217     }
218 
219     pub fn gsbase(&self) -> usize {
220         self.gsbase
221     }
222 
223     pub fn cr2_mut(&mut self) -> &mut usize {
224         &mut self.cr2
225     }
226 
227     pub fn fp_state_mut(&mut self) -> &mut Option<FpState> {
228         &mut self.fp_state
229     }
230 }
231 
232 impl ProcessControlBlock {
233     /// 获取当前进程的pcb
234     pub fn arch_current_pcb() -> Arc<Self> {
235         // 获取栈指针
236         let ptr = VirtAddr::new(x86::current::registers::rsp() as usize);
237         let stack_base = VirtAddr::new(ptr.data() & (!(KernelStack::ALIGN - 1)));
238         // 从内核栈的最低地址处取出pcb的地址
239         let p = stack_base.data() as *const *const ProcessControlBlock;
240         if unlikely((unsafe { *p }).is_null()) {
241             panic!("current_pcb is null");
242         }
243         unsafe {
244             // 为了防止内核栈的pcb weak 指针被释放,这里需要将其包装一下
245             let weak_wrapper: ManuallyDrop<Weak<ProcessControlBlock>> =
246                 ManuallyDrop::new(Weak::from_raw(*p));
247 
248             let new_arc: Arc<ProcessControlBlock> = weak_wrapper.upgrade().unwrap();
249             return new_arc;
250         }
251     }
252 }
253 
254 impl ProcessManager {
255     pub fn arch_init() {
256         {
257             // 初始化进程切换结果 per cpu变量
258             let mut switch_res_vec: Vec<SwitchResult> = Vec::new();
259             for _ in 0..PerCpu::MAX_CPU_NUM {
260                 switch_res_vec.push(SwitchResult::new());
261             }
262             unsafe {
263                 SWITCH_RESULT = Some(PerCpuVar::new(switch_res_vec).unwrap());
264             }
265         }
266     }
267     /// fork的过程中复制线程
268     ///
269     /// 由于这个过程与具体的架构相关,所以放在这里
270     pub fn copy_thread(
271         current_pcb: &Arc<ProcessControlBlock>,
272         new_pcb: &Arc<ProcessControlBlock>,
273         clone_args: KernelCloneArgs,
274         current_trapframe: &TrapFrame,
275     ) -> Result<(), SystemError> {
276         let clone_flags = clone_args.flags;
277         let mut child_trapframe = current_trapframe.clone();
278 
279         // 子进程的返回值为0
280         child_trapframe.set_return_value(0);
281 
282         // 设置子进程的栈基址(开始执行中断返回流程时的栈基址)
283         let mut new_arch_guard = new_pcb.arch_info();
284         let kernel_stack_guard = new_pcb.kernel_stack();
285 
286         // 设置子进程在内核态开始执行时的rsp、rbp
287         new_arch_guard.set_stack_base(kernel_stack_guard.stack_max_address());
288 
289         let trap_frame_vaddr: VirtAddr =
290             kernel_stack_guard.stack_max_address() - core::mem::size_of::<TrapFrame>();
291         new_arch_guard.set_stack(trap_frame_vaddr);
292 
293         // 拷贝栈帧
294         unsafe {
295             let usp = clone_args.stack;
296             if usp != 0 {
297                 child_trapframe.rsp = usp as u64;
298             }
299             let trap_frame_ptr = trap_frame_vaddr.data() as *mut TrapFrame;
300             *trap_frame_ptr = child_trapframe;
301         }
302 
303         let current_arch_guard = current_pcb.arch_info_irqsave();
304         new_arch_guard.fsbase = current_arch_guard.fsbase;
305         new_arch_guard.gsbase = current_arch_guard.gsbase;
306         new_arch_guard.fs = current_arch_guard.fs;
307         new_arch_guard.gs = current_arch_guard.gs;
308         new_arch_guard.fp_state = current_arch_guard.fp_state.clone();
309 
310         // 拷贝浮点寄存器的状态
311         if let Some(fp_state) = current_arch_guard.fp_state.as_ref() {
312             new_arch_guard.fp_state = Some(*fp_state);
313         }
314         drop(current_arch_guard);
315 
316         // 设置返回地址(子进程开始执行的指令地址)
317         if new_pcb.flags().contains(ProcessFlags::KTHREAD) {
318             let kthread_bootstrap_stage1_func_addr = kernel_thread_bootstrap_stage1 as usize;
319             new_arch_guard.rip = kthread_bootstrap_stage1_func_addr;
320         } else {
321             new_arch_guard.rip = ret_from_intr as usize;
322         }
323 
324         // 设置tls
325         if clone_flags.contains(CloneFlags::CLONE_SETTLS) {
326             drop(new_arch_guard);
327             Syscall::do_arch_prctl_64(new_pcb, ARCH_SET_FS, clone_args.tls, true)?;
328         }
329 
330         return Ok(());
331     }
332 
333     /// 切换进程
334     ///
335     /// ## 参数
336     ///
337     /// - `prev`:上一个进程的pcb
338     /// - `next`:下一个进程的pcb
339     pub unsafe fn switch_process(prev: Arc<ProcessControlBlock>, next: Arc<ProcessControlBlock>) {
340         assert!(CurrentIrqArch::is_irq_enabled() == false);
341 
342         // 保存浮点寄存器
343         prev.arch_info().save_fp_state();
344         // 切换浮点寄存器
345         next.arch_info().restore_fp_state();
346 
347         // 切换fsbase
348         prev.arch_info().save_fsbase();
349         next.arch_info().restore_fsbase();
350 
351         // 切换gsbase
352         prev.arch_info().save_gsbase();
353         next.arch_info().restore_gsbase();
354 
355         // 切换地址空间
356         let next_addr_space = next.basic().user_vm().as_ref().unwrap().clone();
357         compiler_fence(Ordering::SeqCst);
358 
359         next_addr_space.read().user_mapper.utable.make_current();
360         drop(next_addr_space);
361         compiler_fence(Ordering::SeqCst);
362         // 切换内核栈
363 
364         // 获取arch info的锁,并强制泄露其守卫(切换上下文后,在switch_finish_hook中会释放锁)
365         let next_arch = SpinLockGuard::leak(next.arch_info()) as *mut ArchPCBInfo;
366         let prev_arch = SpinLockGuard::leak(prev.arch_info()) as *mut ArchPCBInfo;
367 
368         (*prev_arch).rip = switch_back as usize;
369 
370         // 恢复当前的 preempt count*2
371         ProcessManager::current_pcb().preempt_enable();
372         ProcessManager::current_pcb().preempt_enable();
373 
374         // 切换tss
375         TSSManager::current_tss().set_rsp(
376             x86::Ring::Ring0,
377             next.kernel_stack().stack_max_address().data() as u64,
378         );
379         SWITCH_RESULT.as_mut().unwrap().get_mut().prev_pcb = Some(prev);
380         SWITCH_RESULT.as_mut().unwrap().get_mut().next_pcb = Some(next);
381         // kdebug!("switch tss ok");
382         compiler_fence(Ordering::SeqCst);
383         // 正式切换上下文
384         switch_to_inner(prev_arch, next_arch);
385     }
386 }
387 
388 /// 保存上下文,然后切换进程,接着jmp到`switch_finish_hook`钩子函数
389 #[naked]
390 unsafe extern "sysv64" fn switch_to_inner(prev: *mut ArchPCBInfo, next: *mut ArchPCBInfo) {
391     asm!(
392         // As a quick reminder for those who are unfamiliar with the System V ABI (extern "C"):
393         //
394         // - the current parameters are passed in the registers `rdi`, `rsi`,
395         // - we can modify scratch registers, e.g. rax
396         // - we cannot change callee-preserved registers arbitrarily, e.g. rbx, which is why we
397         //   store them here in the first place.
398         concat!("
399         // Save old registers, and load new ones
400         mov [rdi + {off_rbx}], rbx
401         mov rbx, [rsi + {off_rbx}]
402 
403         mov [rdi + {off_r12}], r12
404         mov r12, [rsi + {off_r12}]
405 
406         mov [rdi + {off_r13}], r13
407         mov r13, [rsi + {off_r13}]
408 
409         mov [rdi + {off_r14}], r14
410         mov r14, [rsi + {off_r14}]
411 
412         mov [rdi + {off_r15}], r15
413         mov r15, [rsi + {off_r15}]
414 
415         // switch segment registers (这些寄存器只能通过接下来的switch_hook的return来切换)
416         mov [rdi + {off_fs}], fs
417         mov [rdi + {off_gs}], gs
418 
419         // mov fs, [rsi + {off_fs}]
420         // mov gs, [rsi + {off_gs}]
421 
422         push rbp
423         push rax
424 
425         mov [rdi + {off_rbp}], rbp
426         mov rbp, [rsi + {off_rbp}]
427 
428         mov [rdi + {off_rsp}], rsp
429         mov rsp, [rsi + {off_rsp}]
430 
431         // // push RFLAGS (can only be modified via stack)
432         pushfq
433         // // pop RFLAGS into `self.rflags`
434         pop QWORD PTR [rdi + {off_rflags}]
435 
436         // // push `next.rflags`
437         push QWORD PTR [rsi + {off_rflags}]
438         // // pop into RFLAGS
439         popfq
440 
441         // push next rip to stack
442         push QWORD PTR [rsi + {off_rip}]
443 
444 
445         // When we return, we cannot even guarantee that the return address on the stack, points to
446         // the calling function. Thus, we have to execute this Rust hook by
447         // ourselves, which will unlock the contexts before the later switch.
448 
449         // Note that switch_finish_hook will be responsible for executing `ret`.
450         jmp {switch_hook}
451         "),
452 
453         off_rflags = const(offset_of!(ArchPCBInfo, rflags)),
454 
455         off_rbx = const(offset_of!(ArchPCBInfo, rbx)),
456         off_r12 = const(offset_of!(ArchPCBInfo, r12)),
457         off_r13 = const(offset_of!(ArchPCBInfo, r13)),
458         off_r14 = const(offset_of!(ArchPCBInfo, r14)),
459         off_rbp = const(offset_of!(ArchPCBInfo, rbp)),
460         off_rsp = const(offset_of!(ArchPCBInfo, rsp)),
461         off_r15 = const(offset_of!(ArchPCBInfo, r15)),
462         off_rip = const(offset_of!(ArchPCBInfo, rip)),
463         off_fs = const(offset_of!(ArchPCBInfo, fs)),
464         off_gs = const(offset_of!(ArchPCBInfo, gs)),
465 
466         switch_hook = sym crate::process::switch_finish_hook,
467         options(noreturn),
468     );
469 }
470 
471 /// 从`switch_to_inner`返回后,执行这个函数
472 ///
473 /// 也就是说,当进程再次被调度时,会从这里开始执行
474 #[inline(never)]
475 unsafe extern "sysv64" fn switch_back() {
476     asm!(concat!(
477         "
478         pop rax
479         pop rbp
480         "
481     ))
482 }
483 
484 pub unsafe fn arch_switch_to_user(path: String, argv: Vec<String>, envp: Vec<String>) -> ! {
485     // 以下代码不能发生中断
486     CurrentIrqArch::interrupt_disable();
487 
488     let current_pcb = ProcessManager::current_pcb();
489     let trap_frame_vaddr = VirtAddr::new(
490         current_pcb.kernel_stack().stack_max_address().data() - core::mem::size_of::<TrapFrame>(),
491     );
492     // kdebug!("trap_frame_vaddr: {:?}", trap_frame_vaddr);
493     let new_rip = VirtAddr::new(ret_from_intr as usize);
494 
495     assert!(
496         (x86::current::registers::rsp() as usize) < trap_frame_vaddr.data(),
497         "arch_switch_to_user(): current_rsp >= fake trap
498         frame vaddr, this may cause some illegal access to memory!
499         rsp: {:#x}, trap_frame_vaddr: {:#x}",
500         x86::current::registers::rsp() as usize,
501         trap_frame_vaddr.data()
502     );
503 
504     let mut arch_guard = current_pcb.arch_info_irqsave();
505     arch_guard.rsp = trap_frame_vaddr.data();
506 
507     arch_guard.fs = USER_DS.bits();
508     arch_guard.gs = USER_DS.bits();
509 
510     switch_fs_and_gs(
511         SegmentSelector::from_bits_truncate(arch_guard.fs),
512         SegmentSelector::from_bits_truncate(arch_guard.gs),
513     );
514     arch_guard.rip = new_rip.data();
515 
516     drop(arch_guard);
517 
518     // 删除kthread的标志
519     current_pcb.flags().remove(ProcessFlags::KTHREAD);
520     current_pcb.worker_private().take();
521 
522     let mut trap_frame = TrapFrame::new();
523 
524     compiler_fence(Ordering::SeqCst);
525     Syscall::do_execve(path, argv, envp, &mut trap_frame).unwrap_or_else(|e| {
526         panic!(
527             "arch_switch_to_user(): pid: {pid:?}, Failed to execve: , error: {e:?}",
528             pid = current_pcb.pid(),
529             e = e
530         );
531     });
532     compiler_fence(Ordering::SeqCst);
533 
534     // 重要!在这里之后,一定要保证上面的引用计数变量、动态申请的变量、锁的守卫都被drop了,否则可能导致内存安全问题!
535 
536     drop(current_pcb);
537 
538     compiler_fence(Ordering::SeqCst);
539     ready_to_switch_to_user(trap_frame, trap_frame_vaddr.data(), new_rip.data());
540 }
541 
542 /// 由于需要依赖ret来切换到用户态,所以不能inline
543 #[inline(never)]
544 unsafe extern "sysv64" fn ready_to_switch_to_user(
545     trap_frame: TrapFrame,
546     trapframe_vaddr: usize,
547     new_rip: usize,
548 ) -> ! {
549     *(trapframe_vaddr as *mut TrapFrame) = trap_frame;
550     asm!(
551         "mov rsp, {trapframe_vaddr}",
552         "push {new_rip}",
553         "ret",
554         trapframe_vaddr = in(reg) trapframe_vaddr,
555         new_rip = in(reg) new_rip
556     );
557     unreachable!()
558 }
559