xref: /DragonOS/kernel/src/arch/x86_64/process/mod.rs (revision 0dd8ff43325b494ea777dbe6e552fdc77b9dabc8)
1 use core::{
2     arch::asm,
3     intrinsics::unlikely,
4     mem::ManuallyDrop,
5     sync::atomic::{compiler_fence, Ordering},
6 };
7 
8 use alloc::{string::String, sync::Arc, vec::Vec};
9 
10 use memoffset::offset_of;
11 use x86::{controlregs::Cr4, segmentation::SegmentSelector};
12 
13 use crate::{
14     arch::process::table::TSSManager,
15     exception::InterruptArch,
16     libs::spinlock::SpinLockGuard,
17     mm::{
18         percpu::{PerCpu, PerCpuVar},
19         VirtAddr,
20     },
21     process::{
22         fork::CloneFlags, KernelStack, ProcessControlBlock, ProcessFlags, ProcessManager,
23         SwitchResult, SWITCH_RESULT,
24     },
25     syscall::{Syscall, SystemError},
26 };
27 
28 use self::{
29     kthread::kernel_thread_bootstrap_stage1,
30     table::{switch_fs_and_gs, KERNEL_DS, USER_DS},
31 };
32 
33 use super::{fpu::FpState, interrupt::TrapFrame, CurrentIrqArch};
34 
35 mod c_adapter;
36 pub mod kthread;
37 pub mod syscall;
38 pub mod table;
39 
40 extern "C" {
41     /// 从中断返回
42     fn ret_from_intr();
43 }
44 
45 #[allow(dead_code)]
46 #[repr(align(32768))]
47 union InitProcUnion {
48     /// 用于存放idle进程的内核栈
49     idle_stack: [u8; 32768],
50 }
51 
52 #[link_section = ".data.init_proc_union"]
53 #[no_mangle]
54 static BSP_IDLE_STACK_SPACE: InitProcUnion = InitProcUnion {
55     idle_stack: [0; 32768],
56 };
57 
58 /// PCB中与架构相关的信息
59 #[derive(Debug, Clone)]
60 #[allow(dead_code)]
61 pub struct ArchPCBInfo {
62     rflags: usize,
63     rbx: usize,
64     r12: usize,
65     r13: usize,
66     r14: usize,
67     r15: usize,
68     rbp: usize,
69     rsp: usize,
70     rip: usize,
71     cr2: usize,
72     fsbase: usize,
73     gsbase: usize,
74     fs: u16,
75     gs: u16,
76 
77     /// 浮点寄存器的状态
78     fp_state: Option<FpState>,
79 }
80 
81 #[allow(dead_code)]
82 impl ArchPCBInfo {
83     /// 创建一个新的ArchPCBInfo
84     ///
85     /// ## 参数
86     ///
87     /// - `kstack`:内核栈的引用,如果为None,则不会设置rsp和rbp。如果为Some,则会设置rsp和rbp为内核栈的最高地址。
88     ///
89     /// ## 返回值
90     ///
91     /// 返回一个新的ArchPCBInfo
92     pub fn new(kstack: Option<&KernelStack>) -> Self {
93         let mut r = Self {
94             rflags: 0,
95             rbx: 0,
96             r12: 0,
97             r13: 0,
98             r14: 0,
99             r15: 0,
100             rbp: 0,
101             rsp: 0,
102             rip: 0,
103             cr2: 0,
104             fsbase: 0,
105             gsbase: 0,
106             fs: KERNEL_DS.bits(),
107             gs: KERNEL_DS.bits(),
108             fp_state: None,
109         };
110 
111         if kstack.is_some() {
112             let kstack = kstack.unwrap();
113             r.rsp = kstack.stack_max_address().data();
114             r.rbp = kstack.stack_max_address().data();
115         }
116 
117         return r;
118     }
119 
120     pub fn set_stack(&mut self, stack: VirtAddr) {
121         self.rsp = stack.data();
122     }
123 
124     pub fn set_stack_base(&mut self, stack_base: VirtAddr) {
125         self.rbp = stack_base.data();
126     }
127 
128     pub fn rbp(&self) -> usize {
129         self.rbp
130     }
131 
132     pub unsafe fn push_to_stack(&mut self, value: usize) {
133         self.rsp -= core::mem::size_of::<usize>();
134         *(self.rsp as *mut usize) = value;
135     }
136 
137     pub unsafe fn pop_from_stack(&mut self) -> usize {
138         let value = *(self.rsp as *const usize);
139         self.rsp += core::mem::size_of::<usize>();
140         value
141     }
142 
143     pub fn save_fp_state(&mut self) {
144         if self.fp_state.is_none() {
145             self.fp_state = Some(FpState::new());
146         }
147 
148         self.fp_state.as_mut().unwrap().save();
149     }
150 
151     pub fn restore_fp_state(&mut self) {
152         if unlikely(self.fp_state.is_none()) {
153             return;
154         }
155 
156         self.fp_state.as_mut().unwrap().restore();
157     }
158 
159     pub unsafe fn save_fsbase(&mut self) {
160         if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
161             self.fsbase = x86::current::segmentation::rdfsbase() as usize;
162         } else {
163             self.fsbase = 0;
164         }
165     }
166 
167     pub unsafe fn save_gsbase(&mut self) {
168         if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
169             self.gsbase = x86::current::segmentation::rdgsbase() as usize;
170         } else {
171             self.gsbase = 0;
172         }
173     }
174 
175     pub unsafe fn restore_fsbase(&mut self) {
176         if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
177             x86::current::segmentation::wrfsbase(self.fsbase as u64);
178         }
179     }
180 
181     pub unsafe fn restore_gsbase(&mut self) {
182         if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
183             x86::current::segmentation::wrgsbase(self.gsbase as u64);
184         }
185     }
186 
187     pub fn fsbase(&self) -> usize {
188         self.fsbase
189     }
190 
191     pub fn gsbase(&self) -> usize {
192         self.gsbase
193     }
194 }
195 
196 impl ProcessControlBlock {
197     /// 获取当前进程的pcb
198     pub fn arch_current_pcb() -> Arc<Self> {
199         // 获取栈指针
200         let ptr = VirtAddr::new(x86::current::registers::rsp() as usize);
201         let stack_base = VirtAddr::new(ptr.data() & (!(KernelStack::ALIGN - 1)));
202         // 从内核栈的最低地址处取出pcb的地址
203         let p = stack_base.data() as *const *const ProcessControlBlock;
204         if unlikely((unsafe { *p }).is_null()) {
205             panic!("current_pcb is null");
206         }
207         unsafe {
208             // 为了防止内核栈的pcb指针被释放,这里需要将其包装一下,使得Arc的drop不会被调用
209             let arc_wrapper: ManuallyDrop<Arc<ProcessControlBlock>> =
210                 ManuallyDrop::new(Arc::from_raw(*p));
211 
212             let new_arc: Arc<ProcessControlBlock> = Arc::clone(&arc_wrapper);
213             return new_arc;
214         }
215     }
216 }
217 
218 impl ProcessManager {
219     pub fn arch_init() {
220         {
221             // 初始化进程切换结果 per cpu变量
222             let mut switch_res_vec: Vec<SwitchResult> = Vec::new();
223             for _ in 0..PerCpu::MAX_CPU_NUM {
224                 switch_res_vec.push(SwitchResult::new());
225             }
226             unsafe {
227                 SWITCH_RESULT = Some(PerCpuVar::new(switch_res_vec).unwrap());
228             }
229         }
230     }
231     /// fork的过程中复制线程
232     ///
233     /// 由于这个过程与具体的架构相关,所以放在这里
234     pub fn copy_thread(
235         _clone_flags: &CloneFlags,
236         current_pcb: &Arc<ProcessControlBlock>,
237         new_pcb: &Arc<ProcessControlBlock>,
238         current_trapframe: &TrapFrame,
239     ) -> Result<(), SystemError> {
240         let mut child_trapframe = current_trapframe.clone();
241 
242         // 子进程的返回值为0
243         child_trapframe.set_return_value(0);
244 
245         // 设置子进程的栈基址(开始执行中断返回流程时的栈基址)
246         let mut new_arch_guard = new_pcb.arch_info();
247         let kernel_stack_guard = new_pcb.kernel_stack();
248 
249         // 设置子进程在内核态开始执行时的rsp、rbp
250         new_arch_guard.set_stack_base(kernel_stack_guard.stack_max_address());
251 
252         let trap_frame_vaddr: VirtAddr =
253             kernel_stack_guard.stack_max_address() - core::mem::size_of::<TrapFrame>();
254         new_arch_guard.set_stack(trap_frame_vaddr);
255 
256         // 拷贝栈帧
257         unsafe {
258             let trap_frame_ptr = trap_frame_vaddr.data() as *mut TrapFrame;
259             *trap_frame_ptr = child_trapframe;
260         }
261 
262         let current_arch_guard = current_pcb.arch_info_irqsave();
263         new_arch_guard.fsbase = current_arch_guard.fsbase;
264         new_arch_guard.gsbase = current_arch_guard.gsbase;
265         new_arch_guard.fs = current_arch_guard.fs;
266         new_arch_guard.gs = current_arch_guard.gs;
267         new_arch_guard.fp_state = current_arch_guard.fp_state.clone();
268 
269         // 拷贝浮点寄存器的状态
270         if let Some(fp_state) = current_arch_guard.fp_state.as_ref() {
271             new_arch_guard.fp_state = Some(*fp_state);
272         }
273         drop(current_arch_guard);
274 
275         // 设置返回地址(子进程开始执行的指令地址)
276 
277         if new_pcb.flags().contains(ProcessFlags::KTHREAD) {
278             let kthread_bootstrap_stage1_func_addr = kernel_thread_bootstrap_stage1 as usize;
279 
280             new_arch_guard.rip = kthread_bootstrap_stage1_func_addr;
281         } else {
282             new_arch_guard.rip = ret_from_intr as usize;
283         }
284 
285         return Ok(());
286     }
287 
288     /// 切换进程
289     ///
290     /// ## 参数
291     ///
292     /// - `prev`:上一个进程的pcb
293     /// - `next`:下一个进程的pcb
294     pub unsafe fn switch_process(prev: Arc<ProcessControlBlock>, next: Arc<ProcessControlBlock>) {
295         assert!(CurrentIrqArch::is_irq_enabled() == false);
296 
297         // 保存浮点寄存器
298         prev.arch_info().save_fp_state();
299         // 切换浮点寄存器
300         next.arch_info().restore_fp_state();
301 
302         // 切换fsbase
303         prev.arch_info().save_fsbase();
304         next.arch_info().restore_fsbase();
305 
306         // 切换gsbase
307         prev.arch_info().save_gsbase();
308         next.arch_info().restore_gsbase();
309 
310         // 切换地址空间
311         let next_addr_space = next.basic().user_vm().as_ref().unwrap().clone();
312         compiler_fence(Ordering::SeqCst);
313 
314         next_addr_space.read().user_mapper.utable.make_current();
315         compiler_fence(Ordering::SeqCst);
316         // 切换内核栈
317 
318         // 获取arch info的锁,并强制泄露其守卫(切换上下文后,在switch_finish_hook中会释放锁)
319         let next_arch = SpinLockGuard::leak(next.arch_info());
320         let prev_arch = SpinLockGuard::leak(prev.arch_info());
321 
322         prev_arch.rip = switch_back as usize;
323 
324         // 恢复当前的 preempt count*2
325         ProcessManager::current_pcb().preempt_enable();
326         ProcessManager::current_pcb().preempt_enable();
327         SWITCH_RESULT.as_mut().unwrap().get_mut().prev_pcb = Some(prev.clone());
328         SWITCH_RESULT.as_mut().unwrap().get_mut().next_pcb = Some(next.clone());
329 
330         // 切换tss
331         TSSManager::current_tss().set_rsp(
332             x86::Ring::Ring0,
333             next.kernel_stack().stack_max_address().data() as u64,
334         );
335         // kdebug!("switch tss ok");
336 
337         compiler_fence(Ordering::SeqCst);
338         // 正式切换上下文
339         switch_to_inner(prev_arch, next_arch);
340     }
341 }
342 
343 /// 保存上下文,然后切换进程,接着jmp到`switch_finish_hook`钩子函数
344 #[naked]
345 unsafe extern "sysv64" fn switch_to_inner(prev: &mut ArchPCBInfo, next: &mut ArchPCBInfo) {
346     asm!(
347         // As a quick reminder for those who are unfamiliar with the System V ABI (extern "C"):
348         //
349         // - the current parameters are passed in the registers `rdi`, `rsi`,
350         // - we can modify scratch registers, e.g. rax
351         // - we cannot change callee-preserved registers arbitrarily, e.g. rbx, which is why we
352         //   store them here in the first place.
353         concat!("
354         // Save old registers, and load new ones
355         mov [rdi + {off_rbx}], rbx
356         mov rbx, [rsi + {off_rbx}]
357 
358         mov [rdi + {off_r12}], r12
359         mov r12, [rsi + {off_r12}]
360 
361         mov [rdi + {off_r13}], r13
362         mov r13, [rsi + {off_r13}]
363 
364         mov [rdi + {off_r14}], r14
365         mov r14, [rsi + {off_r14}]
366 
367         mov [rdi + {off_r15}], r15
368         mov r15, [rsi + {off_r15}]
369 
370         // switch segment registers (这些寄存器只能通过接下来的switch_hook的return来切换)
371         mov [rdi + {off_fs}], fs
372         mov [rdi + {off_gs}], gs
373 
374         push rbp
375         push rax
376 
377         mov [rdi + {off_rbp}], rbp
378         mov rbp, [rsi + {off_rbp}]
379 
380         mov [rdi + {off_rsp}], rsp
381         mov rsp, [rsi + {off_rsp}]
382 
383         // // push RFLAGS (can only be modified via stack)
384         pushfq
385         // // pop RFLAGS into `self.rflags`
386         pop QWORD PTR [rdi + {off_rflags}]
387 
388         // // push `next.rflags`
389         push QWORD PTR [rsi + {off_rflags}]
390         // // pop into RFLAGS
391         popfq
392 
393         // push next rip to stack
394         push QWORD PTR [rsi + {off_rip}]
395 
396 
397         // When we return, we cannot even guarantee that the return address on the stack, points to
398         // the calling function. Thus, we have to execute this Rust hook by
399         // ourselves, which will unlock the contexts before the later switch.
400 
401         // Note that switch_finish_hook will be responsible for executing `ret`.
402         jmp {switch_hook}
403         "),
404 
405         off_rflags = const(offset_of!(ArchPCBInfo, rflags)),
406 
407         off_rbx = const(offset_of!(ArchPCBInfo, rbx)),
408         off_r12 = const(offset_of!(ArchPCBInfo, r12)),
409         off_r13 = const(offset_of!(ArchPCBInfo, r13)),
410         off_r14 = const(offset_of!(ArchPCBInfo, r14)),
411         off_rbp = const(offset_of!(ArchPCBInfo, rbp)),
412         off_rsp = const(offset_of!(ArchPCBInfo, rsp)),
413         off_r15 = const(offset_of!(ArchPCBInfo, r15)),
414         off_rip = const(offset_of!(ArchPCBInfo, rip)),
415         off_fs = const(offset_of!(ArchPCBInfo, fs)),
416         off_gs = const(offset_of!(ArchPCBInfo, gs)),
417 
418         switch_hook = sym crate::process::switch_finish_hook,
419         options(noreturn),
420     );
421 }
422 
423 /// 从`switch_to_inner`返回后,执行这个函数
424 ///
425 /// 也就是说,当进程再次被调度时,会从这里开始执行
426 #[inline(never)]
427 unsafe extern "sysv64" fn switch_back() {
428     asm!(concat!(
429         "
430         pop rax
431         pop rbp
432         "
433     ))
434 }
435 
436 pub unsafe fn arch_switch_to_user(path: String, argv: Vec<String>, envp: Vec<String>) -> ! {
437     // 以下代码不能发生中断
438     CurrentIrqArch::interrupt_disable();
439 
440     let current_pcb = ProcessManager::current_pcb();
441     let trap_frame_vaddr = VirtAddr::new(
442         current_pcb.kernel_stack().stack_max_address().data() - core::mem::size_of::<TrapFrame>(),
443     );
444     // kdebug!("trap_frame_vaddr: {:?}", trap_frame_vaddr);
445     let new_rip = VirtAddr::new(ret_from_intr as usize);
446 
447     assert!(
448         (x86::current::registers::rsp() as usize) < trap_frame_vaddr.data(),
449         "arch_switch_to_user(): current_rsp >= fake trap
450         frame vaddr, this may cause some illegal access to memory!
451         rsp: {:#x}, trap_frame_vaddr: {:#x}",
452         x86::current::registers::rsp() as usize,
453         trap_frame_vaddr.data()
454     );
455 
456     let mut arch_guard = current_pcb.arch_info_irqsave();
457     arch_guard.rsp = trap_frame_vaddr.data();
458 
459     arch_guard.fs = USER_DS.bits();
460     arch_guard.gs = USER_DS.bits();
461 
462     switch_fs_and_gs(
463         SegmentSelector::from_bits_truncate(arch_guard.fs),
464         SegmentSelector::from_bits_truncate(arch_guard.gs),
465     );
466     arch_guard.rip = new_rip.data();
467 
468     drop(arch_guard);
469 
470     // 删除kthread的标志
471     current_pcb.flags().remove(ProcessFlags::KTHREAD);
472     current_pcb.worker_private().take();
473 
474     let mut trap_frame = TrapFrame::new();
475 
476     compiler_fence(Ordering::SeqCst);
477     Syscall::do_execve(path, argv, envp, &mut trap_frame).unwrap_or_else(|e| {
478         panic!(
479             "arch_switch_to_user(): pid: {pid:?}, Failed to execve: , error: {e:?}",
480             pid = current_pcb.pid(),
481             e = e
482         );
483     });
484     compiler_fence(Ordering::SeqCst);
485 
486     // 重要!在这里之后,一定要保证上面的引用计数变量、动态申请的变量、锁的守卫都被drop了,否则可能导致内存安全问题!
487 
488     drop(current_pcb);
489 
490     compiler_fence(Ordering::SeqCst);
491     ready_to_switch_to_user(trap_frame, trap_frame_vaddr.data(), new_rip.data());
492 }
493 
494 /// 由于需要依赖ret来切换到用户态,所以不能inline
495 #[inline(never)]
496 unsafe extern "sysv64" fn ready_to_switch_to_user(
497     trap_frame: TrapFrame,
498     trapframe_vaddr: usize,
499     new_rip: usize,
500 ) -> ! {
501     *(trapframe_vaddr as *mut TrapFrame) = trap_frame;
502     asm!(
503         "mov rsp, {trapframe_vaddr}",
504         "push {new_rip}",
505         "ret",
506         trapframe_vaddr = in(reg) trapframe_vaddr,
507         new_rip = in(reg) new_rip
508     );
509     unreachable!()
510 }
511