xref: /DragonOS/kernel/src/arch/x86_64/process/mod.rs (revision 1496ba7b24a5e6954291ca9643b9f3cec567479a)
1 use core::{
2     arch::asm,
3     intrinsics::unlikely,
4     mem::ManuallyDrop,
5     sync::atomic::{compiler_fence, Ordering},
6 };
7 
8 use alloc::{string::String, sync::Arc, vec::Vec};
9 
10 use memoffset::offset_of;
11 use x86::{controlregs::Cr4, segmentation::SegmentSelector};
12 
13 use crate::{
14     arch::process::table::TSSManager,
15     exception::InterruptArch,
16     libs::spinlock::SpinLockGuard,
17     mm::{
18         percpu::{PerCpu, PerCpuVar},
19         VirtAddr,
20     },
21     process::{
22         fork::CloneFlags, KernelStack, ProcessControlBlock, ProcessFlags, ProcessManager,
23         SwitchResult, SWITCH_RESULT,
24     },
25     syscall::{Syscall, SystemError},
26 };
27 
28 use self::{
29     kthread::kernel_thread_bootstrap_stage1,
30     table::{switch_fs_and_gs, KERNEL_DS, USER_DS},
31 };
32 
33 use super::{fpu::FpState, interrupt::TrapFrame, CurrentIrqArch};
34 
35 mod c_adapter;
36 pub mod kthread;
37 pub mod syscall;
38 pub mod table;
39 
40 extern "C" {
41     /// 从中断返回
42     fn ret_from_intr();
43 }
44 
45 /// PCB中与架构相关的信息
46 #[derive(Debug, Clone)]
47 #[allow(dead_code)]
48 pub struct ArchPCBInfo {
49     rflags: usize,
50     rbx: usize,
51     r12: usize,
52     r13: usize,
53     r14: usize,
54     r15: usize,
55     rbp: usize,
56     rsp: usize,
57     rip: usize,
58     cr2: usize,
59     fsbase: usize,
60     gsbase: usize,
61     fs: u16,
62     gs: u16,
63 
64     /// 浮点寄存器的状态
65     fp_state: Option<FpState>,
66 }
67 
68 #[allow(dead_code)]
69 impl ArchPCBInfo {
70     /// 创建一个新的ArchPCBInfo
71     ///
72     /// ## 参数
73     ///
74     /// - `kstack`:内核栈的引用,如果为None,则不会设置rsp和rbp。如果为Some,则会设置rsp和rbp为内核栈的最高地址。
75     ///
76     /// ## 返回值
77     ///
78     /// 返回一个新的ArchPCBInfo
79     pub fn new(kstack: Option<&KernelStack>) -> Self {
80         let mut r = Self {
81             rflags: 0,
82             rbx: 0,
83             r12: 0,
84             r13: 0,
85             r14: 0,
86             r15: 0,
87             rbp: 0,
88             rsp: 0,
89             rip: 0,
90             cr2: 0,
91             fsbase: 0,
92             gsbase: 0,
93             fs: KERNEL_DS.bits(),
94             gs: KERNEL_DS.bits(),
95             fp_state: None,
96         };
97 
98         if kstack.is_some() {
99             let kstack = kstack.unwrap();
100             r.rsp = kstack.stack_max_address().data();
101             r.rbp = kstack.stack_max_address().data();
102         }
103 
104         return r;
105     }
106 
107     pub fn set_stack(&mut self, stack: VirtAddr) {
108         self.rsp = stack.data();
109     }
110 
111     pub fn set_stack_base(&mut self, stack_base: VirtAddr) {
112         self.rbp = stack_base.data();
113     }
114 
115     pub fn rbp(&self) -> usize {
116         self.rbp
117     }
118 
119     pub unsafe fn push_to_stack(&mut self, value: usize) {
120         self.rsp -= core::mem::size_of::<usize>();
121         *(self.rsp as *mut usize) = value;
122     }
123 
124     pub unsafe fn pop_from_stack(&mut self) -> usize {
125         let value = *(self.rsp as *const usize);
126         self.rsp += core::mem::size_of::<usize>();
127         value
128     }
129 
130     pub fn save_fp_state(&mut self) {
131         if self.fp_state.is_none() {
132             self.fp_state = Some(FpState::new());
133         }
134 
135         self.fp_state.as_mut().unwrap().save();
136     }
137 
138     pub fn restore_fp_state(&mut self) {
139         if unlikely(self.fp_state.is_none()) {
140             return;
141         }
142 
143         self.fp_state.as_mut().unwrap().restore();
144     }
145 
146     pub unsafe fn save_fsbase(&mut self) {
147         if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
148             self.fsbase = x86::current::segmentation::rdfsbase() as usize;
149         } else {
150             self.fsbase = 0;
151         }
152     }
153 
154     pub unsafe fn save_gsbase(&mut self) {
155         if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
156             self.gsbase = x86::current::segmentation::rdgsbase() as usize;
157         } else {
158             self.gsbase = 0;
159         }
160     }
161 
162     pub unsafe fn restore_fsbase(&mut self) {
163         if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
164             x86::current::segmentation::wrfsbase(self.fsbase as u64);
165         }
166     }
167 
168     pub unsafe fn restore_gsbase(&mut self) {
169         if x86::controlregs::cr4().contains(Cr4::CR4_ENABLE_FSGSBASE) {
170             x86::current::segmentation::wrgsbase(self.gsbase as u64);
171         }
172     }
173 
174     pub fn fsbase(&self) -> usize {
175         self.fsbase
176     }
177 
178     pub fn gsbase(&self) -> usize {
179         self.gsbase
180     }
181 }
182 
183 impl ProcessControlBlock {
184     /// 获取当前进程的pcb
185     pub fn arch_current_pcb() -> Arc<Self> {
186         // 获取栈指针
187         let ptr = VirtAddr::new(x86::current::registers::rsp() as usize);
188         let stack_base = VirtAddr::new(ptr.data() & (!(KernelStack::ALIGN - 1)));
189         // 从内核栈的最低地址处取出pcb的地址
190         let p = stack_base.data() as *const *const ProcessControlBlock;
191         if unlikely((unsafe { *p }).is_null()) {
192             panic!("current_pcb is null");
193         }
194         unsafe {
195             // 为了防止内核栈的pcb指针被释放,这里需要将其包装一下,使得Arc的drop不会被调用
196             let arc_wrapper: ManuallyDrop<Arc<ProcessControlBlock>> =
197                 ManuallyDrop::new(Arc::from_raw(*p));
198 
199             let new_arc: Arc<ProcessControlBlock> = Arc::clone(&arc_wrapper);
200             return new_arc;
201         }
202     }
203 }
204 
205 impl ProcessManager {
206     pub fn arch_init() {
207         {
208             // 初始化进程切换结果 per cpu变量
209             let mut switch_res_vec: Vec<SwitchResult> = Vec::new();
210             for _ in 0..PerCpu::MAX_CPU_NUM {
211                 switch_res_vec.push(SwitchResult::new());
212             }
213             unsafe {
214                 SWITCH_RESULT = Some(PerCpuVar::new(switch_res_vec).unwrap());
215             }
216         }
217     }
218     /// fork的过程中复制线程
219     ///
220     /// 由于这个过程与具体的架构相关,所以放在这里
221     pub fn copy_thread(
222         _clone_flags: &CloneFlags,
223         current_pcb: &Arc<ProcessControlBlock>,
224         new_pcb: &Arc<ProcessControlBlock>,
225         current_trapframe: &TrapFrame,
226     ) -> Result<(), SystemError> {
227         let mut child_trapframe = current_trapframe.clone();
228 
229         // 子进程的返回值为0
230         child_trapframe.set_return_value(0);
231 
232         // 设置子进程的栈基址(开始执行中断返回流程时的栈基址)
233         let mut new_arch_guard = new_pcb.arch_info();
234         let kernel_stack_guard = new_pcb.kernel_stack();
235 
236         // 设置子进程在内核态开始执行时的rsp、rbp
237         new_arch_guard.set_stack_base(kernel_stack_guard.stack_max_address());
238 
239         let trap_frame_vaddr: VirtAddr =
240             kernel_stack_guard.stack_max_address() - core::mem::size_of::<TrapFrame>();
241         new_arch_guard.set_stack(trap_frame_vaddr);
242 
243         // 拷贝栈帧
244         unsafe {
245             let trap_frame_ptr = trap_frame_vaddr.data() as *mut TrapFrame;
246             *trap_frame_ptr = child_trapframe;
247         }
248 
249         let current_arch_guard = current_pcb.arch_info_irqsave();
250         new_arch_guard.fsbase = current_arch_guard.fsbase;
251         new_arch_guard.gsbase = current_arch_guard.gsbase;
252         new_arch_guard.fs = current_arch_guard.fs;
253         new_arch_guard.gs = current_arch_guard.gs;
254         new_arch_guard.fp_state = current_arch_guard.fp_state.clone();
255 
256         // 拷贝浮点寄存器的状态
257         if let Some(fp_state) = current_arch_guard.fp_state.as_ref() {
258             new_arch_guard.fp_state = Some(*fp_state);
259         }
260         drop(current_arch_guard);
261 
262         // 设置返回地址(子进程开始执行的指令地址)
263 
264         if new_pcb.flags().contains(ProcessFlags::KTHREAD) {
265             let kthread_bootstrap_stage1_func_addr = kernel_thread_bootstrap_stage1 as usize;
266 
267             new_arch_guard.rip = kthread_bootstrap_stage1_func_addr;
268         } else {
269             new_arch_guard.rip = ret_from_intr as usize;
270         }
271 
272         return Ok(());
273     }
274 
275     /// 切换进程
276     ///
277     /// ## 参数
278     ///
279     /// - `prev`:上一个进程的pcb
280     /// - `next`:下一个进程的pcb
281     pub unsafe fn switch_process(prev: Arc<ProcessControlBlock>, next: Arc<ProcessControlBlock>) {
282         assert!(CurrentIrqArch::is_irq_enabled() == false);
283 
284         // 保存浮点寄存器
285         prev.arch_info().save_fp_state();
286         // 切换浮点寄存器
287         next.arch_info().restore_fp_state();
288 
289         // 切换fsbase
290         prev.arch_info().save_fsbase();
291         next.arch_info().restore_fsbase();
292 
293         // 切换gsbase
294         prev.arch_info().save_gsbase();
295         next.arch_info().restore_gsbase();
296 
297         // 切换地址空间
298         let next_addr_space = next.basic().user_vm().as_ref().unwrap().clone();
299         compiler_fence(Ordering::SeqCst);
300 
301         next_addr_space.read().user_mapper.utable.make_current();
302         compiler_fence(Ordering::SeqCst);
303         // 切换内核栈
304 
305         // 获取arch info的锁,并强制泄露其守卫(切换上下文后,在switch_finish_hook中会释放锁)
306         let next_arch = SpinLockGuard::leak(next.arch_info());
307         let prev_arch = SpinLockGuard::leak(prev.arch_info());
308 
309         prev_arch.rip = switch_back as usize;
310 
311         // 恢复当前的 preempt count*2
312         ProcessManager::current_pcb().preempt_enable();
313         ProcessManager::current_pcb().preempt_enable();
314         SWITCH_RESULT.as_mut().unwrap().get_mut().prev_pcb = Some(prev.clone());
315         SWITCH_RESULT.as_mut().unwrap().get_mut().next_pcb = Some(next.clone());
316 
317         // 切换tss
318         TSSManager::current_tss().set_rsp(
319             x86::Ring::Ring0,
320             next.kernel_stack().stack_max_address().data() as u64,
321         );
322         // kdebug!("switch tss ok");
323 
324         // 正式切换上下文
325         switch_to_inner(prev_arch, next_arch);
326     }
327 }
328 
329 /// 保存上下文,然后切换进程,接着jmp到`switch_finish_hook`钩子函数
330 #[naked]
331 unsafe extern "sysv64" fn switch_to_inner(prev: &mut ArchPCBInfo, next: &mut ArchPCBInfo) {
332     asm!(
333         // As a quick reminder for those who are unfamiliar with the System V ABI (extern "C"):
334         //
335         // - the current parameters are passed in the registers `rdi`, `rsi`,
336         // - we can modify scratch registers, e.g. rax
337         // - we cannot change callee-preserved registers arbitrarily, e.g. rbx, which is why we
338         //   store them here in the first place.
339         concat!("
340         // Save old registers, and load new ones
341         mov [rdi + {off_rbx}], rbx
342         mov rbx, [rsi + {off_rbx}]
343 
344         mov [rdi + {off_r12}], r12
345         mov r12, [rsi + {off_r12}]
346 
347         mov [rdi + {off_r13}], r13
348         mov r13, [rsi + {off_r13}]
349 
350         mov [rdi + {off_r14}], r14
351         mov r14, [rsi + {off_r14}]
352 
353         mov [rdi + {off_r15}], r15
354         mov r15, [rsi + {off_r15}]
355 
356         // switch segment registers (这些寄存器只能通过接下来的switch_hook的return来切换)
357         mov [rdi + {off_fs}], fs
358         mov [rdi + {off_gs}], gs
359 
360         push rbp
361         push rax
362 
363         mov [rdi + {off_rbp}], rbp
364         mov rbp, [rsi + {off_rbp}]
365 
366         mov [rdi + {off_rsp}], rsp
367         mov rsp, [rsi + {off_rsp}]
368 
369         // // push RFLAGS (can only be modified via stack)
370         pushfq
371         // // pop RFLAGS into `self.rflags`
372         pop QWORD PTR [rdi + {off_rflags}]
373 
374         // // push `next.rflags`
375         push QWORD PTR [rsi + {off_rflags}]
376         // // pop into RFLAGS
377         popfq
378 
379         // push next rip to stack
380         push QWORD PTR [rsi + {off_rip}]
381 
382 
383         // When we return, we cannot even guarantee that the return address on the stack, points to
384         // the calling function. Thus, we have to execute this Rust hook by
385         // ourselves, which will unlock the contexts before the later switch.
386 
387         // Note that switch_finish_hook will be responsible for executing `ret`.
388         jmp {switch_hook}
389         "),
390 
391         off_rflags = const(offset_of!(ArchPCBInfo, rflags)),
392 
393         off_rbx = const(offset_of!(ArchPCBInfo, rbx)),
394         off_r12 = const(offset_of!(ArchPCBInfo, r12)),
395         off_r13 = const(offset_of!(ArchPCBInfo, r13)),
396         off_r14 = const(offset_of!(ArchPCBInfo, r14)),
397         off_rbp = const(offset_of!(ArchPCBInfo, rbp)),
398         off_rsp = const(offset_of!(ArchPCBInfo, rsp)),
399         off_r15 = const(offset_of!(ArchPCBInfo, r15)),
400         off_rip = const(offset_of!(ArchPCBInfo, rip)),
401         off_fs = const(offset_of!(ArchPCBInfo, fs)),
402         off_gs = const(offset_of!(ArchPCBInfo, gs)),
403 
404         switch_hook = sym crate::process::switch_finish_hook,
405         options(noreturn),
406     );
407 }
408 
409 /// 从`switch_to_inner`返回后,执行这个函数
410 ///
411 /// 也就是说,当进程再次被调度时,会从这里开始执行
412 #[inline(never)]
413 unsafe extern "sysv64" fn switch_back() {
414     asm!(concat!(
415         "
416         pop rax
417         pop rbp
418         "
419     ))
420 }
421 
422 pub unsafe fn arch_switch_to_user(path: String, argv: Vec<String>, envp: Vec<String>) -> ! {
423     // 以下代码不能发生中断
424     CurrentIrqArch::interrupt_disable();
425 
426     let current_pcb = ProcessManager::current_pcb();
427     let trap_frame_vaddr = VirtAddr::new(
428         current_pcb.kernel_stack().stack_max_address().data() - core::mem::size_of::<TrapFrame>(),
429     );
430     // kdebug!("trap_frame_vaddr: {:?}", trap_frame_vaddr);
431     let new_rip = VirtAddr::new(ret_from_intr as usize);
432 
433     assert!(
434         (x86::current::registers::rsp() as usize) < trap_frame_vaddr.data(),
435         "arch_switch_to_user(): current_rsp >= fake trap
436         frame vaddr, this may cause some illegal access to memory!
437         rsp: {:#x}, trap_frame_vaddr: {:#x}",
438         x86::current::registers::rsp() as usize,
439         trap_frame_vaddr.data()
440     );
441 
442     let mut arch_guard = current_pcb.arch_info_irqsave();
443     arch_guard.rsp = trap_frame_vaddr.data();
444 
445     arch_guard.fs = USER_DS.bits();
446     arch_guard.gs = USER_DS.bits();
447 
448     switch_fs_and_gs(
449         SegmentSelector::from_bits_truncate(arch_guard.fs),
450         SegmentSelector::from_bits_truncate(arch_guard.gs),
451     );
452     arch_guard.rip = new_rip.data();
453 
454     drop(arch_guard);
455 
456     // 删除kthread的标志
457     current_pcb.flags().remove(ProcessFlags::KTHREAD);
458     current_pcb.worker_private().take();
459 
460     let mut trap_frame = TrapFrame::new();
461 
462     compiler_fence(Ordering::SeqCst);
463     Syscall::do_execve(path, argv, envp, &mut trap_frame).unwrap_or_else(|e| {
464         panic!(
465             "arch_switch_to_user(): pid: {pid:?}, Failed to execve: , error: {e:?}",
466             pid = current_pcb.pid(),
467             e = e
468         );
469     });
470     compiler_fence(Ordering::SeqCst);
471 
472     // 重要!在这里之后,一定要保证上面的引用计数变量、动态申请的变量、锁的守卫都被drop了,否则可能导致内存安全问题!
473 
474     drop(current_pcb);
475 
476     compiler_fence(Ordering::SeqCst);
477     ready_to_switch_to_user(trap_frame, trap_frame_vaddr.data(), new_rip.data());
478 }
479 
480 /// 由于需要依赖ret来切换到用户态,所以不能inline
481 #[inline(never)]
482 unsafe extern "sysv64" fn ready_to_switch_to_user(
483     trap_frame: TrapFrame,
484     trapframe_vaddr: usize,
485     new_rip: usize,
486 ) -> ! {
487     *(trapframe_vaddr as *mut TrapFrame) = trap_frame;
488     asm!(
489         "mov rsp, {trapframe_vaddr}",
490         "push {new_rip}",
491         "ret",
492         trapframe_vaddr = in(reg) trapframe_vaddr,
493         new_rip = in(reg) new_rip
494     );
495     unreachable!()
496 }
497