xref: /DragonOS/kernel/src/process/syscall.rs (revision 0f094e50dee4cc20efbbe32852a4c79e619c4806)
1 use core::ffi::c_void;
2 
3 use alloc::{
4     ffi::CString,
5     string::{String, ToString},
6     sync::Arc,
7     vec::Vec,
8 };
9 use log::error;
10 use system_error::SystemError;
11 
12 use super::{
13     abi::WaitOption,
14     cred::{Kgid, Kuid},
15     exec::{load_binary_file, ExecParam, ExecParamFlags},
16     exit::kernel_wait4,
17     fork::{CloneFlags, KernelCloneArgs},
18     resource::{RLimit64, RLimitID, RUsage, RUsageWho},
19     KernelStack, Pid, ProcessManager,
20 };
21 use crate::{
22     arch::{interrupt::TrapFrame, CurrentIrqArch, MMArch},
23     exception::InterruptArch,
24     filesystem::{
25         procfs::procfs_register_pid,
26         vfs::{file::FileDescriptorVec, MAX_PATHLEN},
27     },
28     mm::{
29         ucontext::{AddressSpace, UserStack},
30         verify_area, MemoryManagementArch, VirtAddr,
31     },
32     process::ProcessControlBlock,
33     sched::completion::Completion,
34     syscall::{
35         user_access::{check_and_clone_cstr, check_and_clone_cstr_array, UserBufferWriter},
36         Syscall,
37     },
38 };
39 
40 //参考资料:https://code.dragonos.org.cn/xref/linux-6.1.9/include/uapi/linux/utsname.h#17
41 #[repr(C)]
42 #[derive(Debug, Clone, Copy)]
43 pub struct PosixOldUtsName {
44     pub sysname: [u8; 65],
45     pub nodename: [u8; 65],
46     pub release: [u8; 65],
47     pub version: [u8; 65],
48     pub machine: [u8; 65],
49 }
50 
51 impl PosixOldUtsName {
new() -> Self52     pub fn new() -> Self {
53         const SYS_NAME: &[u8] = b"DragonOS";
54         const NODENAME: &[u8] = b"DragonOS";
55         const RELEASE: &[u8] = env!("CARGO_PKG_VERSION").as_bytes();
56         const VERSION: &[u8] = env!("CARGO_PKG_VERSION").as_bytes();
57 
58         #[cfg(target_arch = "x86_64")]
59         const MACHINE: &[u8] = b"x86_64";
60 
61         #[cfg(target_arch = "aarch64")]
62         const MACHINE: &[u8] = b"aarch64";
63 
64         #[cfg(target_arch = "riscv64")]
65         const MACHINE: &[u8] = b"riscv64";
66 
67         let mut r = Self {
68             sysname: [0; 65],
69             nodename: [0; 65],
70             release: [0; 65],
71             version: [0; 65],
72             machine: [0; 65],
73         };
74 
75         r.sysname[0..SYS_NAME.len()].copy_from_slice(SYS_NAME);
76         r.nodename[0..NODENAME.len()].copy_from_slice(NODENAME);
77         r.release[0..RELEASE.len()].copy_from_slice(RELEASE);
78         r.version[0..VERSION.len()].copy_from_slice(VERSION);
79         r.machine[0..MACHINE.len()].copy_from_slice(MACHINE);
80 
81         return r;
82     }
83 }
84 
85 impl Syscall {
fork(frame: &TrapFrame) -> Result<usize, SystemError>86     pub fn fork(frame: &TrapFrame) -> Result<usize, SystemError> {
87         ProcessManager::fork(frame, CloneFlags::empty()).map(|pid| pid.into())
88     }
89 
vfork(frame: &TrapFrame) -> Result<usize, SystemError>90     pub fn vfork(frame: &TrapFrame) -> Result<usize, SystemError> {
91         // 由于Linux vfork需要保证子进程先运行(除非子进程调用execve或者exit),
92         // 而我们目前没有实现这个特性,所以暂时使用fork代替vfork(linux文档表示这样也是也可以的)
93         Self::fork(frame)
94 
95         // 下面是以前的实现,除非我们实现了子进程先运行的特性,否则不要使用,不然会导致父进程数据损坏
96         // ProcessManager::fork(
97         //     frame,
98         //     CloneFlags::CLONE_VM | CloneFlags::CLONE_FS | CloneFlags::CLONE_SIGNAL,
99         // )
100         // .map(|pid| pid.into())
101     }
102 
execve( path: *const u8, argv: *const *const u8, envp: *const *const u8, frame: &mut TrapFrame, ) -> Result<(), SystemError>103     pub fn execve(
104         path: *const u8,
105         argv: *const *const u8,
106         envp: *const *const u8,
107         frame: &mut TrapFrame,
108     ) -> Result<(), SystemError> {
109         // debug!(
110         //     "execve path: {:?}, argv: {:?}, envp: {:?}\n",
111         //     path,
112         //     argv,
113         //     envp
114         // );
115         // debug!(
116         //     "before execve: strong count: {}",
117         //     Arc::strong_count(&ProcessManager::current_pcb())
118         // );
119 
120         if path.is_null() {
121             return Err(SystemError::EINVAL);
122         }
123 
124         let x = || {
125             let path: CString = check_and_clone_cstr(path, Some(MAX_PATHLEN))?;
126             let argv: Vec<CString> = check_and_clone_cstr_array(argv)?;
127             let envp: Vec<CString> = check_and_clone_cstr_array(envp)?;
128             Ok((path, argv, envp))
129         };
130         let (path, argv, envp) = x().inspect_err(|e: &SystemError| {
131             error!("Failed to execve: {:?}", e);
132         })?;
133 
134         let path = path.into_string().map_err(|_| SystemError::EINVAL)?;
135         ProcessManager::current_pcb()
136             .basic_mut()
137             .set_name(ProcessControlBlock::generate_name(&path, &argv));
138 
139         Self::do_execve(path, argv, envp, frame)?;
140 
141         // 关闭设置了O_CLOEXEC的文件描述符
142         let fd_table = ProcessManager::current_pcb().fd_table();
143         fd_table.write().close_on_exec();
144         // debug!(
145         //     "after execve: strong count: {}",
146         //     Arc::strong_count(&ProcessManager::current_pcb())
147         // );
148 
149         return Ok(());
150     }
151 
do_execve( path: String, argv: Vec<CString>, envp: Vec<CString>, regs: &mut TrapFrame, ) -> Result<(), SystemError>152     pub fn do_execve(
153         path: String,
154         argv: Vec<CString>,
155         envp: Vec<CString>,
156         regs: &mut TrapFrame,
157     ) -> Result<(), SystemError> {
158         let address_space = AddressSpace::new(true).expect("Failed to create new address space");
159         // debug!("to load binary file");
160         let mut param = ExecParam::new(path.as_str(), address_space.clone(), ExecParamFlags::EXEC)?;
161         let old_vm = do_execve_switch_user_vm(address_space.clone());
162 
163         // 加载可执行文件
164         let load_result = load_binary_file(&mut param).inspect_err(|_| {
165             if let Some(old_vm) = old_vm {
166                 do_execve_switch_user_vm(old_vm);
167             }
168         })?;
169 
170         // debug!("load binary file done");
171         // debug!("argv: {:?}, envp: {:?}", argv, envp);
172         param.init_info_mut().args = argv;
173         param.init_info_mut().envs = envp;
174 
175         // 把proc_init_info写到用户栈上
176         let mut ustack_message = unsafe {
177             address_space
178                 .write()
179                 .user_stack_mut()
180                 .expect("No user stack found")
181                 .clone_info_only()
182         };
183         let (user_sp, argv_ptr) = unsafe {
184             param
185                 .init_info()
186                 .push_at(
187                     // address_space
188                     //     .write()
189                     //     .user_stack_mut()
190                     //     .expect("No user stack found"),
191                     &mut ustack_message,
192                 )
193                 .expect("Failed to push proc_init_info to user stack")
194         };
195         address_space.write().user_stack = Some(ustack_message);
196 
197         Self::arch_do_execve(regs, &param, &load_result, user_sp, argv_ptr)
198     }
199 
wait4( pid: i64, wstatus: *mut i32, options: i32, rusage: *mut c_void, ) -> Result<usize, SystemError>200     pub fn wait4(
201         pid: i64,
202         wstatus: *mut i32,
203         options: i32,
204         rusage: *mut c_void,
205     ) -> Result<usize, SystemError> {
206         let options = WaitOption::from_bits(options as u32).ok_or(SystemError::EINVAL)?;
207 
208         let wstatus_buf = if wstatus.is_null() {
209             None
210         } else {
211             Some(UserBufferWriter::new(
212                 wstatus,
213                 core::mem::size_of::<i32>(),
214                 true,
215             )?)
216         };
217 
218         let mut tmp_rusage = if rusage.is_null() {
219             None
220         } else {
221             Some(RUsage::default())
222         };
223 
224         let r = kernel_wait4(pid, wstatus_buf, options, tmp_rusage.as_mut())?;
225 
226         if !rusage.is_null() {
227             let mut rusage_buf = UserBufferWriter::new::<RUsage>(
228                 rusage as *mut RUsage,
229                 core::mem::size_of::<RUsage>(),
230                 true,
231             )?;
232             rusage_buf.copy_one_to_user(&tmp_rusage.unwrap(), 0)?;
233         }
234         return Ok(r);
235     }
236 
237     /// # 退出进程
238     ///
239     /// ## 参数
240     ///
241     /// - status: 退出状态
exit(status: usize) -> !242     pub fn exit(status: usize) -> ! {
243         ProcessManager::exit(status);
244     }
245 
246     /// @brief 获取当前进程的pid
getpid() -> Result<Pid, SystemError>247     pub fn getpid() -> Result<Pid, SystemError> {
248         let current_pcb = ProcessManager::current_pcb();
249         // if let Some(pid_ns) = &current_pcb.get_nsproxy().read().pid_namespace {
250         //     // 获取该进程在命名空间中的 PID
251         //     return Ok(current_pcb.pid_strcut().read().numbers[pid_ns.level].nr);
252         //     // 返回命名空间中的 PID
253         // }
254         // 默认返回 tgid
255         Ok(current_pcb.tgid())
256     }
257 
258     /// @brief 获取指定进程的pgid
259     ///
260     /// @param pid 指定一个进程号
261     ///
262     /// @return 成功,指定进程的进程组id
263     /// @return 错误,不存在该进程
getpgid(mut pid: Pid) -> Result<Pid, SystemError>264     pub fn getpgid(mut pid: Pid) -> Result<Pid, SystemError> {
265         if pid == Pid(0) {
266             let current_pcb = ProcessManager::current_pcb();
267             pid = current_pcb.pid();
268         }
269         let target_proc = ProcessManager::find(pid).ok_or(SystemError::ESRCH)?;
270         return Ok(target_proc.basic().pgid());
271     }
272     /// @brief 获取当前进程的父进程id
273     ///
274     /// 若为initproc则ppid设置为0
getppid() -> Result<Pid, SystemError>275     pub fn getppid() -> Result<Pid, SystemError> {
276         let current_pcb = ProcessManager::current_pcb();
277         return Ok(current_pcb.basic().ppid());
278     }
279 
clone( current_trapframe: &TrapFrame, clone_args: KernelCloneArgs, ) -> Result<usize, SystemError>280     pub fn clone(
281         current_trapframe: &TrapFrame,
282         clone_args: KernelCloneArgs,
283     ) -> Result<usize, SystemError> {
284         let flags = clone_args.flags;
285 
286         let vfork = Arc::new(Completion::new());
287 
288         if flags.contains(CloneFlags::CLONE_PIDFD)
289             && flags.contains(CloneFlags::CLONE_PARENT_SETTID)
290         {
291             return Err(SystemError::EINVAL);
292         }
293 
294         let current_pcb = ProcessManager::current_pcb();
295         let new_kstack = KernelStack::new()?;
296         let name = current_pcb.basic().name().to_string();
297         let pcb = ProcessControlBlock::new(name, new_kstack);
298         // 克隆pcb
299         ProcessManager::copy_process(&current_pcb, &pcb, clone_args, current_trapframe)?;
300         ProcessManager::add_pcb(pcb.clone());
301 
302         // 向procfs注册进程
303         procfs_register_pid(pcb.pid()).unwrap_or_else(|e| {
304             panic!(
305                 "fork: Failed to register pid to procfs, pid: [{:?}]. Error: {:?}",
306                 pcb.pid(),
307                 e
308             )
309         });
310 
311         if flags.contains(CloneFlags::CLONE_VFORK) {
312             pcb.thread.write_irqsave().vfork_done = Some(vfork.clone());
313         }
314 
315         if pcb.thread.read_irqsave().set_child_tid.is_some() {
316             let addr = pcb.thread.read_irqsave().set_child_tid.unwrap();
317             let mut writer =
318                 UserBufferWriter::new(addr.as_ptr::<i32>(), core::mem::size_of::<i32>(), true)?;
319             writer.copy_one_to_user(&(pcb.pid().data() as i32), 0)?;
320         }
321 
322         ProcessManager::wakeup(&pcb).unwrap_or_else(|e| {
323             panic!(
324                 "fork: Failed to wakeup new process, pid: [{:?}]. Error: {:?}",
325                 pcb.pid(),
326                 e
327             )
328         });
329 
330         if flags.contains(CloneFlags::CLONE_VFORK) {
331             // 等待子进程结束或者exec;
332             vfork.wait_for_completion_interruptible()?;
333         }
334 
335         return Ok(pcb.pid().0);
336     }
337 
338     /// 设置线程地址
set_tid_address(ptr: usize) -> Result<usize, SystemError>339     pub fn set_tid_address(ptr: usize) -> Result<usize, SystemError> {
340         verify_area(VirtAddr::new(ptr), core::mem::size_of::<i32>())
341             .map_err(|_| SystemError::EFAULT)?;
342 
343         let pcb = ProcessManager::current_pcb();
344         pcb.thread.write_irqsave().clear_child_tid = Some(VirtAddr::new(ptr));
345         Ok(pcb.pid.0)
346     }
347 
gettid() -> Result<Pid, SystemError>348     pub fn gettid() -> Result<Pid, SystemError> {
349         let pcb = ProcessManager::current_pcb();
350         Ok(pcb.pid)
351     }
352 
getuid() -> Result<usize, SystemError>353     pub fn getuid() -> Result<usize, SystemError> {
354         let pcb = ProcessManager::current_pcb();
355         return Ok(pcb.cred.lock().uid.data());
356     }
357 
getgid() -> Result<usize, SystemError>358     pub fn getgid() -> Result<usize, SystemError> {
359         let pcb = ProcessManager::current_pcb();
360         return Ok(pcb.cred.lock().gid.data());
361     }
362 
geteuid() -> Result<usize, SystemError>363     pub fn geteuid() -> Result<usize, SystemError> {
364         let pcb = ProcessManager::current_pcb();
365         return Ok(pcb.cred.lock().euid.data());
366     }
367 
getegid() -> Result<usize, SystemError>368     pub fn getegid() -> Result<usize, SystemError> {
369         let pcb = ProcessManager::current_pcb();
370         return Ok(pcb.cred.lock().egid.data());
371     }
372 
setuid(uid: usize) -> Result<usize, SystemError>373     pub fn setuid(uid: usize) -> Result<usize, SystemError> {
374         let pcb = ProcessManager::current_pcb();
375         let mut guard = pcb.cred.lock();
376 
377         if guard.uid.data() == 0 {
378             guard.setuid(uid);
379             guard.seteuid(uid);
380             guard.setsuid(uid);
381         } else if uid == guard.uid.data() || uid == guard.suid.data() {
382             guard.seteuid(uid);
383         } else {
384             return Err(SystemError::EPERM);
385         }
386 
387         return Ok(0);
388     }
389 
setgid(gid: usize) -> Result<usize, SystemError>390     pub fn setgid(gid: usize) -> Result<usize, SystemError> {
391         let pcb = ProcessManager::current_pcb();
392         let mut guard = pcb.cred.lock();
393 
394         if guard.egid.data() == 0 {
395             guard.setgid(gid);
396             guard.setegid(gid);
397             guard.setsgid(gid);
398             guard.setfsgid(gid);
399         } else if guard.gid.data() == gid || guard.sgid.data() == gid {
400             guard.setegid(gid);
401             guard.setfsgid(gid);
402         } else {
403             return Err(SystemError::EPERM);
404         }
405 
406         return Ok(0);
407     }
408 
seteuid(euid: usize) -> Result<usize, SystemError>409     pub fn seteuid(euid: usize) -> Result<usize, SystemError> {
410         let pcb = ProcessManager::current_pcb();
411         let mut guard = pcb.cred.lock();
412 
413         if euid == usize::MAX || (euid == guard.euid.data() && euid == guard.fsuid.data()) {
414             return Ok(0);
415         }
416 
417         if euid != usize::MAX {
418             guard.seteuid(euid);
419         }
420 
421         let euid = guard.euid.data();
422         guard.setfsuid(euid);
423 
424         return Ok(0);
425     }
426 
setegid(egid: usize) -> Result<usize, SystemError>427     pub fn setegid(egid: usize) -> Result<usize, SystemError> {
428         let pcb = ProcessManager::current_pcb();
429         let mut guard = pcb.cred.lock();
430 
431         if egid == usize::MAX || (egid == guard.egid.data() && egid == guard.fsgid.data()) {
432             return Ok(0);
433         }
434 
435         if egid != usize::MAX {
436             guard.setegid(egid);
437         }
438 
439         let egid = guard.egid.data();
440         guard.setfsgid(egid);
441 
442         return Ok(0);
443     }
444 
setfsuid(fsuid: usize) -> Result<usize, SystemError>445     pub fn setfsuid(fsuid: usize) -> Result<usize, SystemError> {
446         let fsuid = Kuid::new(fsuid);
447 
448         let pcb = ProcessManager::current_pcb();
449         let mut guard = pcb.cred.lock();
450         let old_fsuid = guard.fsuid;
451 
452         if fsuid == guard.uid || fsuid == guard.euid || fsuid == guard.suid {
453             guard.setfsuid(fsuid.data());
454         }
455 
456         Ok(old_fsuid.data())
457     }
458 
setfsgid(fsgid: usize) -> Result<usize, SystemError>459     pub fn setfsgid(fsgid: usize) -> Result<usize, SystemError> {
460         let fsgid = Kgid::new(fsgid);
461 
462         let pcb = ProcessManager::current_pcb();
463         let mut guard = pcb.cred.lock();
464         let old_fsgid = guard.fsgid;
465 
466         if fsgid == guard.gid || fsgid == guard.egid || fsgid == guard.sgid {
467             guard.setfsgid(fsgid.data());
468         }
469 
470         Ok(old_fsgid.data())
471     }
472 
get_rusage(who: i32, rusage: *mut RUsage) -> Result<usize, SystemError>473     pub fn get_rusage(who: i32, rusage: *mut RUsage) -> Result<usize, SystemError> {
474         let who = RUsageWho::try_from(who)?;
475         let mut writer = UserBufferWriter::new(rusage, core::mem::size_of::<RUsage>(), true)?;
476         let pcb = ProcessManager::current_pcb();
477         let rusage = pcb.get_rusage(who).ok_or(SystemError::EINVAL)?;
478 
479         let ubuf = writer.buffer::<RUsage>(0).unwrap();
480         ubuf.copy_from_slice(&[rusage]);
481 
482         return Ok(0);
483     }
484 
485     /// # 设置资源限制
486     ///
487     /// TODO: 目前暂时不支持设置资源限制,只提供读取默认值的功能
488     ///
489     /// ## 参数
490     ///
491     /// - pid: 进程号
492     /// - resource: 资源类型
493     /// - new_limit: 新的资源限制
494     /// - old_limit: 旧的资源限制
495     ///
496     /// ## 返回值
497     ///
498     /// - 成功,0
499     /// - 如果old_limit不为NULL,则返回旧的资源限制到old_limit
500     ///
prlimit64( _pid: Pid, resource: usize, _new_limit: *const RLimit64, old_limit: *mut RLimit64, ) -> Result<usize, SystemError>501     pub fn prlimit64(
502         _pid: Pid,
503         resource: usize,
504         _new_limit: *const RLimit64,
505         old_limit: *mut RLimit64,
506     ) -> Result<usize, SystemError> {
507         let resource = RLimitID::try_from(resource)?;
508         let mut writer = None;
509 
510         if !old_limit.is_null() {
511             writer = Some(UserBufferWriter::new(
512                 old_limit,
513                 core::mem::size_of::<RLimit64>(),
514                 true,
515             )?);
516         }
517 
518         match resource {
519             RLimitID::Stack => {
520                 if let Some(mut writer) = writer {
521                     let mut rlimit = writer.buffer::<RLimit64>(0).unwrap()[0];
522                     rlimit.rlim_cur = UserStack::DEFAULT_USER_STACK_SIZE as u64;
523                     rlimit.rlim_max = UserStack::DEFAULT_USER_STACK_SIZE as u64;
524                 }
525                 return Ok(0);
526             }
527 
528             RLimitID::Nofile => {
529                 if let Some(mut writer) = writer {
530                     let mut rlimit = writer.buffer::<RLimit64>(0).unwrap()[0];
531                     rlimit.rlim_cur = FileDescriptorVec::PROCESS_MAX_FD as u64;
532                     rlimit.rlim_max = FileDescriptorVec::PROCESS_MAX_FD as u64;
533                 }
534                 return Ok(0);
535             }
536 
537             RLimitID::As | RLimitID::Rss => {
538                 if let Some(mut writer) = writer {
539                     let mut rlimit = writer.buffer::<RLimit64>(0).unwrap()[0];
540                     rlimit.rlim_cur = MMArch::USER_END_VADDR.data() as u64;
541                     rlimit.rlim_max = MMArch::USER_END_VADDR.data() as u64;
542                 }
543                 return Ok(0);
544             }
545 
546             _ => {
547                 return Err(SystemError::ENOSYS);
548             }
549         }
550     }
551 
uname(name: *mut PosixOldUtsName) -> Result<usize, SystemError>552     pub fn uname(name: *mut PosixOldUtsName) -> Result<usize, SystemError> {
553         let mut writer =
554             UserBufferWriter::new(name, core::mem::size_of::<PosixOldUtsName>(), true)?;
555         writer.copy_one_to_user(&PosixOldUtsName::new(), 0)?;
556 
557         return Ok(0);
558     }
559 }
560 
561 /// 切换用户虚拟内存空间
562 ///
563 /// 该函数用于在执行系统调用 `execve` 时切换用户进程的虚拟内存空间。
564 ///
565 /// # 参数
566 /// - `new_vm`: 新的用户地址空间,类型为 `Arc<AddressSpace>`。
567 ///
568 /// # 返回值
569 /// - 返回旧的用户地址空间的引用,类型为 `Option<Arc<AddressSpace>>`。
570 ///
571 /// # 错误处理
572 /// 如果地址空间切换失败,函数会触发断言失败,并输出错误信息。
do_execve_switch_user_vm(new_vm: Arc<AddressSpace>) -> Option<Arc<AddressSpace>>573 fn do_execve_switch_user_vm(new_vm: Arc<AddressSpace>) -> Option<Arc<AddressSpace>> {
574     // 关中断,防止在设置地址空间的时候,发生中断,然后进调度器,出现错误。
575     let irq_guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
576     let pcb = ProcessManager::current_pcb();
577     // log::debug!(
578     //     "pid: {:?}  do_execve: path: {:?}, argv: {:?}, envp: {:?}\n",
579     //     pcb.pid(),
580     //     path,
581     //     argv,
582     //     envp
583     // );
584 
585     let mut basic_info = pcb.basic_mut();
586     // 暂存原本的用户地址空间的引用(因为如果在切换页表之前释放了它,可能会造成内存use after free)
587     let old_address_space = basic_info.user_vm();
588 
589     // 在pcb中原来的用户地址空间
590     unsafe {
591         basic_info.set_user_vm(None);
592     }
593     // 创建新的地址空间并设置为当前地址空间
594     unsafe {
595         basic_info.set_user_vm(Some(new_vm.clone()));
596     }
597 
598     // to avoid deadlock
599     drop(basic_info);
600 
601     assert!(
602         AddressSpace::is_current(&new_vm),
603         "Failed to set address space"
604     );
605     // debug!("Switch to new address space");
606 
607     // 切换到新的用户地址空间
608     unsafe { new_vm.read().user_mapper.utable.make_current() };
609 
610     drop(irq_guard);
611 
612     old_address_space
613 }
614