xref: /DragonOS/kernel/src/arch/x86_64/process/mod.rs (revision 40314b30ab2a7e1fd06a05a00f693e644e446035)
1 use core::{
2     arch::asm,
3     intrinsics::unlikely,
4     mem::ManuallyDrop,
5     sync::atomic::{compiler_fence, Ordering},
6 };
7 
8 use alloc::{string::String, sync::Arc, vec::Vec};
9 
10 use memoffset::offset_of;
11 use x86::{controlregs::Cr4, segmentation::SegmentSelector};
12 
13 use crate::{
14     arch::process::table::TSSManager,
15     exception::InterruptArch,
16     kwarn,
17     libs::spinlock::SpinLockGuard,
18     mm::{
19         percpu::{PerCpu, PerCpuVar},
20         VirtAddr,
21     },
22     process::{
23         fork::CloneFlags, KernelStack, ProcessControlBlock, ProcessFlags, ProcessManager,
24         SwitchResult, SWITCH_RESULT,
25     },
26     syscall::{Syscall, SystemError},
27 };
28 
29 use self::{
30     kthread::kernel_thread_bootstrap_stage1,
31     table::{switch_fs_and_gs, KERNEL_DS, USER_DS},
32 };
33 
34 use super::{fpu::FpState, interrupt::TrapFrame, CurrentIrqArch};
35 
36 mod c_adapter;
37 pub mod kthread;
38 pub mod syscall;
39 pub mod table;
40 
41 extern "C" {
42     /// 从中断返回
43     fn ret_from_intr();
44 }
45 
46 #[allow(dead_code)]
47 #[repr(align(32768))]
48 union InitProcUnion {
49     /// 用于存放idle进程的内核栈
50     idle_stack: [u8; 32768],
51 }
52 
53 #[link_section = ".data.init_proc_union"]
54 #[no_mangle]
55 static BSP_IDLE_STACK_SPACE: InitProcUnion = InitProcUnion {
56     idle_stack: [0; 32768],
57 };
58 
59 /// PCB中与架构相关的信息
60 #[derive(Debug, Clone)]
61 #[allow(dead_code)]
62 pub struct ArchPCBInfo {
63     rflags: usize,
64     rbx: usize,
65     r12: usize,
66     r13: usize,
67     r14: usize,
68     r15: usize,
69     rbp: usize,
70     rsp: usize,
71     rip: usize,
72     cr2: usize,
73     fsbase: usize,
74     gsbase: usize,
75     fs: u16,
76     gs: u16,
77 
78     /// 浮点寄存器的状态
79     fp_state: Option<FpState>,
80 }
81 
82 #[allow(dead_code)]
83 impl ArchPCBInfo {
84     /// 创建一个新的ArchPCBInfo
85     ///
86     /// ## 参数
87     ///
88     /// - `kstack`:内核栈的引用,如果为None,则不会设置rsp和rbp。如果为Some,则会设置rsp和rbp为内核栈的最高地址。
89     ///
90     /// ## 返回值
91     ///
92     /// 返回一个新的ArchPCBInfo
93     pub fn new(kstack: Option<&KernelStack>) -> Self {
94         let mut r = Self {
95             rflags: 0,
96             rbx: 0,
97             r12: 0,
98             r13: 0,
99             r14: 0,
100             r15: 0,
101             rbp: 0,
102             rsp: 0,
103             rip: 0,
104             cr2: 0,
105             fsbase: 0,
106             gsbase: 0,
107             fs: KERNEL_DS.bits(),
108             gs: KERNEL_DS.bits(),
109             fp_state: None,
110         };
111 
112         if kstack.is_some() {
113             let kstack = kstack.unwrap();
114             r.rsp = kstack.stack_max_address().data();
115             r.rbp = kstack.stack_max_address().data();
116         }
117 
118         return r;
119     }
120 
121     pub fn set_stack(&mut self, stack: VirtAddr) {
122         self.rsp = stack.data();
123     }
124 
125     pub fn set_stack_base(&mut self, stack_base: VirtAddr) {
126         self.rbp = stack_base.data();
127     }
128 
129     pub fn rbp(&self) -> usize {
130         self.rbp
131     }
132 
133     pub unsafe fn push_to_stack(&mut self, value: usize) {
134         self.rsp -= core::mem::size_of::<usize>();
135         *(self.rsp as *mut usize) = value;
136     }
137 
138     pub unsafe fn pop_from_stack(&mut self) -> usize {
139         let value = *(self.rsp as *const usize);
140         self.rsp += core::mem::size_of::<usize>();
141         value
142     }
143 
144     pub fn save_fp_state(&mut self) {
145         if self.fp_state.is_none() {
146             self.fp_state = Some(FpState::new());
147         }
148 
149         self.fp_state.as_mut().unwrap().save();
150     }
151 
152     pub fn restore_fp_state(&mut self) {
153         if unlikely(self.fp_state.is_none()) {
154             return;
155         }
156 
157         self.fp_state.as_mut().unwrap().restore();
158     }
159 
160     /// 返回浮点寄存器结构体的副本
161     pub fn fp_state(&self) -> &Option<FpState> {
162         &self.fp_state
163     }
164 
165     // 清空浮点寄存器
166     pub fn clear_fp_state(&mut self) {
167         if unlikely(self.fp_state.is_none()) {
168             kwarn!("fp_state is none");
169             return;
170         }
171 
172         self.fp_state.as_mut().unwrap().clear();
173     }
174     pub unsafe fn save_fsbase(&mut self) {
175         if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
176             self.fsbase = x86::current::segmentation::rdfsbase() as usize;
177         } else {
178             self.fsbase = 0;
179         }
180     }
181 
182     pub unsafe fn save_gsbase(&mut self) {
183         if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
184             self.gsbase = x86::current::segmentation::rdgsbase() as usize;
185         } else {
186             self.gsbase = 0;
187         }
188     }
189 
190     pub unsafe fn restore_fsbase(&mut self) {
191         if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
192             x86::current::segmentation::wrfsbase(self.fsbase as u64);
193         }
194     }
195 
196     pub unsafe fn restore_gsbase(&mut self) {
197         if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
198             x86::current::segmentation::wrgsbase(self.gsbase as u64);
199         }
200     }
201 
202     pub fn fsbase(&self) -> usize {
203         self.fsbase
204     }
205 
206     pub fn gsbase(&self) -> usize {
207         self.gsbase
208     }
209 
210     pub fn cr2_mut(&mut self) -> &mut usize {
211         &mut self.cr2
212     }
213 
214     pub fn fp_state_mut(&mut self) -> &mut Option<FpState> {
215         &mut self.fp_state
216     }
217 }
218 
219 impl ProcessControlBlock {
220     /// 获取当前进程的pcb
221     pub fn arch_current_pcb() -> Arc<Self> {
222         // 获取栈指针
223         let ptr = VirtAddr::new(x86::current::registers::rsp() as usize);
224         let stack_base = VirtAddr::new(ptr.data() & (!(KernelStack::ALIGN - 1)));
225         // 从内核栈的最低地址处取出pcb的地址
226         let p = stack_base.data() as *const *const ProcessControlBlock;
227         if unlikely((unsafe { *p }).is_null()) {
228             panic!("current_pcb is null");
229         }
230         unsafe {
231             // 为了防止内核栈的pcb指针被释放,这里需要将其包装一下,使得Arc的drop不会被调用
232             let arc_wrapper: ManuallyDrop<Arc<ProcessControlBlock>> =
233                 ManuallyDrop::new(Arc::from_raw(*p));
234 
235             let new_arc: Arc<ProcessControlBlock> = Arc::clone(&arc_wrapper);
236             return new_arc;
237         }
238     }
239 }
240 
241 impl ProcessManager {
242     pub fn arch_init() {
243         {
244             // 初始化进程切换结果 per cpu变量
245             let mut switch_res_vec: Vec<SwitchResult> = Vec::new();
246             for _ in 0..PerCpu::MAX_CPU_NUM {
247                 switch_res_vec.push(SwitchResult::new());
248             }
249             unsafe {
250                 SWITCH_RESULT = Some(PerCpuVar::new(switch_res_vec).unwrap());
251             }
252         }
253     }
254     /// fork的过程中复制线程
255     ///
256     /// 由于这个过程与具体的架构相关,所以放在这里
257     pub fn copy_thread(
258         _clone_flags: &CloneFlags,
259         current_pcb: &Arc<ProcessControlBlock>,
260         new_pcb: &Arc<ProcessControlBlock>,
261         current_trapframe: &TrapFrame,
262     ) -> Result<(), SystemError> {
263         let mut child_trapframe = current_trapframe.clone();
264 
265         // 子进程的返回值为0
266         child_trapframe.set_return_value(0);
267 
268         // 设置子进程的栈基址(开始执行中断返回流程时的栈基址)
269         let mut new_arch_guard = new_pcb.arch_info();
270         let kernel_stack_guard = new_pcb.kernel_stack();
271 
272         // 设置子进程在内核态开始执行时的rsp、rbp
273         new_arch_guard.set_stack_base(kernel_stack_guard.stack_max_address());
274 
275         let trap_frame_vaddr: VirtAddr =
276             kernel_stack_guard.stack_max_address() - core::mem::size_of::<TrapFrame>();
277         new_arch_guard.set_stack(trap_frame_vaddr);
278 
279         // 拷贝栈帧
280         unsafe {
281             let trap_frame_ptr = trap_frame_vaddr.data() as *mut TrapFrame;
282             *trap_frame_ptr = child_trapframe;
283         }
284 
285         let current_arch_guard = current_pcb.arch_info_irqsave();
286         new_arch_guard.fsbase = current_arch_guard.fsbase;
287         new_arch_guard.gsbase = current_arch_guard.gsbase;
288         new_arch_guard.fs = current_arch_guard.fs;
289         new_arch_guard.gs = current_arch_guard.gs;
290         new_arch_guard.fp_state = current_arch_guard.fp_state.clone();
291 
292         // 拷贝浮点寄存器的状态
293         if let Some(fp_state) = current_arch_guard.fp_state.as_ref() {
294             new_arch_guard.fp_state = Some(*fp_state);
295         }
296         drop(current_arch_guard);
297 
298         // 设置返回地址(子进程开始执行的指令地址)
299 
300         if new_pcb.flags().contains(ProcessFlags::KTHREAD) {
301             let kthread_bootstrap_stage1_func_addr = kernel_thread_bootstrap_stage1 as usize;
302 
303             new_arch_guard.rip = kthread_bootstrap_stage1_func_addr;
304         } else {
305             new_arch_guard.rip = ret_from_intr as usize;
306         }
307 
308         return Ok(());
309     }
310 
311     /// 切换进程
312     ///
313     /// ## 参数
314     ///
315     /// - `prev`:上一个进程的pcb
316     /// - `next`:下一个进程的pcb
317     pub unsafe fn switch_process(prev: Arc<ProcessControlBlock>, next: Arc<ProcessControlBlock>) {
318         assert!(CurrentIrqArch::is_irq_enabled() == false);
319 
320         // 保存浮点寄存器
321         prev.arch_info().save_fp_state();
322         // 切换浮点寄存器
323         next.arch_info().restore_fp_state();
324 
325         // 切换fsbase
326         prev.arch_info().save_fsbase();
327         next.arch_info().restore_fsbase();
328 
329         // 切换gsbase
330         prev.arch_info().save_gsbase();
331         next.arch_info().restore_gsbase();
332 
333         // 切换地址空间
334         let next_addr_space = next.basic().user_vm().as_ref().unwrap().clone();
335         compiler_fence(Ordering::SeqCst);
336 
337         next_addr_space.read().user_mapper.utable.make_current();
338         compiler_fence(Ordering::SeqCst);
339         // 切换内核栈
340 
341         // 获取arch info的锁,并强制泄露其守卫(切换上下文后,在switch_finish_hook中会释放锁)
342         let next_arch = SpinLockGuard::leak(next.arch_info());
343         let prev_arch = SpinLockGuard::leak(prev.arch_info());
344 
345         prev_arch.rip = switch_back as usize;
346 
347         // 恢复当前的 preempt count*2
348         ProcessManager::current_pcb().preempt_enable();
349         ProcessManager::current_pcb().preempt_enable();
350         SWITCH_RESULT.as_mut().unwrap().get_mut().prev_pcb = Some(prev.clone());
351         SWITCH_RESULT.as_mut().unwrap().get_mut().next_pcb = Some(next.clone());
352 
353         // 切换tss
354         TSSManager::current_tss().set_rsp(
355             x86::Ring::Ring0,
356             next.kernel_stack().stack_max_address().data() as u64,
357         );
358         // kdebug!("switch tss ok");
359 
360         compiler_fence(Ordering::SeqCst);
361         // 正式切换上下文
362         switch_to_inner(prev_arch, next_arch);
363     }
364 }
365 
366 /// 保存上下文,然后切换进程,接着jmp到`switch_finish_hook`钩子函数
367 #[naked]
368 unsafe extern "sysv64" fn switch_to_inner(prev: &mut ArchPCBInfo, next: &mut ArchPCBInfo) {
369     asm!(
370         // As a quick reminder for those who are unfamiliar with the System V ABI (extern "C"):
371         //
372         // - the current parameters are passed in the registers `rdi`, `rsi`,
373         // - we can modify scratch registers, e.g. rax
374         // - we cannot change callee-preserved registers arbitrarily, e.g. rbx, which is why we
375         //   store them here in the first place.
376         concat!("
377         // Save old registers, and load new ones
378         mov [rdi + {off_rbx}], rbx
379         mov rbx, [rsi + {off_rbx}]
380 
381         mov [rdi + {off_r12}], r12
382         mov r12, [rsi + {off_r12}]
383 
384         mov [rdi + {off_r13}], r13
385         mov r13, [rsi + {off_r13}]
386 
387         mov [rdi + {off_r14}], r14
388         mov r14, [rsi + {off_r14}]
389 
390         mov [rdi + {off_r15}], r15
391         mov r15, [rsi + {off_r15}]
392 
393         // switch segment registers (这些寄存器只能通过接下来的switch_hook的return来切换)
394         mov [rdi + {off_fs}], fs
395         mov [rdi + {off_gs}], gs
396 
397         push rbp
398         push rax
399 
400         mov [rdi + {off_rbp}], rbp
401         mov rbp, [rsi + {off_rbp}]
402 
403         mov [rdi + {off_rsp}], rsp
404         mov rsp, [rsi + {off_rsp}]
405 
406         // // push RFLAGS (can only be modified via stack)
407         pushfq
408         // // pop RFLAGS into `self.rflags`
409         pop QWORD PTR [rdi + {off_rflags}]
410 
411         // // push `next.rflags`
412         push QWORD PTR [rsi + {off_rflags}]
413         // // pop into RFLAGS
414         popfq
415 
416         // push next rip to stack
417         push QWORD PTR [rsi + {off_rip}]
418 
419 
420         // When we return, we cannot even guarantee that the return address on the stack, points to
421         // the calling function. Thus, we have to execute this Rust hook by
422         // ourselves, which will unlock the contexts before the later switch.
423 
424         // Note that switch_finish_hook will be responsible for executing `ret`.
425         jmp {switch_hook}
426         "),
427 
428         off_rflags = const(offset_of!(ArchPCBInfo, rflags)),
429 
430         off_rbx = const(offset_of!(ArchPCBInfo, rbx)),
431         off_r12 = const(offset_of!(ArchPCBInfo, r12)),
432         off_r13 = const(offset_of!(ArchPCBInfo, r13)),
433         off_r14 = const(offset_of!(ArchPCBInfo, r14)),
434         off_rbp = const(offset_of!(ArchPCBInfo, rbp)),
435         off_rsp = const(offset_of!(ArchPCBInfo, rsp)),
436         off_r15 = const(offset_of!(ArchPCBInfo, r15)),
437         off_rip = const(offset_of!(ArchPCBInfo, rip)),
438         off_fs = const(offset_of!(ArchPCBInfo, fs)),
439         off_gs = const(offset_of!(ArchPCBInfo, gs)),
440 
441         switch_hook = sym crate::process::switch_finish_hook,
442         options(noreturn),
443     );
444 }
445 
446 /// 从`switch_to_inner`返回后,执行这个函数
447 ///
448 /// 也就是说,当进程再次被调度时,会从这里开始执行
449 #[inline(never)]
450 unsafe extern "sysv64" fn switch_back() {
451     asm!(concat!(
452         "
453         pop rax
454         pop rbp
455         "
456     ))
457 }
458 
459 pub unsafe fn arch_switch_to_user(path: String, argv: Vec<String>, envp: Vec<String>) -> ! {
460     // 以下代码不能发生中断
461     CurrentIrqArch::interrupt_disable();
462 
463     let current_pcb = ProcessManager::current_pcb();
464     let trap_frame_vaddr = VirtAddr::new(
465         current_pcb.kernel_stack().stack_max_address().data() - core::mem::size_of::<TrapFrame>(),
466     );
467     // kdebug!("trap_frame_vaddr: {:?}", trap_frame_vaddr);
468     let new_rip = VirtAddr::new(ret_from_intr as usize);
469 
470     assert!(
471         (x86::current::registers::rsp() as usize) < trap_frame_vaddr.data(),
472         "arch_switch_to_user(): current_rsp >= fake trap
473         frame vaddr, this may cause some illegal access to memory!
474         rsp: {:#x}, trap_frame_vaddr: {:#x}",
475         x86::current::registers::rsp() as usize,
476         trap_frame_vaddr.data()
477     );
478 
479     let mut arch_guard = current_pcb.arch_info_irqsave();
480     arch_guard.rsp = trap_frame_vaddr.data();
481 
482     arch_guard.fs = USER_DS.bits();
483     arch_guard.gs = USER_DS.bits();
484 
485     switch_fs_and_gs(
486         SegmentSelector::from_bits_truncate(arch_guard.fs),
487         SegmentSelector::from_bits_truncate(arch_guard.gs),
488     );
489     arch_guard.rip = new_rip.data();
490 
491     drop(arch_guard);
492 
493     // 删除kthread的标志
494     current_pcb.flags().remove(ProcessFlags::KTHREAD);
495     current_pcb.worker_private().take();
496 
497     let mut trap_frame = TrapFrame::new();
498 
499     compiler_fence(Ordering::SeqCst);
500     Syscall::do_execve(path, argv, envp, &mut trap_frame).unwrap_or_else(|e| {
501         panic!(
502             "arch_switch_to_user(): pid: {pid:?}, Failed to execve: , error: {e:?}",
503             pid = current_pcb.pid(),
504             e = e
505         );
506     });
507     compiler_fence(Ordering::SeqCst);
508 
509     // 重要!在这里之后,一定要保证上面的引用计数变量、动态申请的变量、锁的守卫都被drop了,否则可能导致内存安全问题!
510 
511     drop(current_pcb);
512 
513     compiler_fence(Ordering::SeqCst);
514     ready_to_switch_to_user(trap_frame, trap_frame_vaddr.data(), new_rip.data());
515 }
516 
517 /// 由于需要依赖ret来切换到用户态,所以不能inline
518 #[inline(never)]
519 unsafe extern "sysv64" fn ready_to_switch_to_user(
520     trap_frame: TrapFrame,
521     trapframe_vaddr: usize,
522     new_rip: usize,
523 ) -> ! {
524     *(trapframe_vaddr as *mut TrapFrame) = trap_frame;
525     asm!(
526         "mov rsp, {trapframe_vaddr}",
527         "push {new_rip}",
528         "ret",
529         trapframe_vaddr = in(reg) trapframe_vaddr,
530         new_rip = in(reg) new_rip
531     );
532     unreachable!()
533 }
534