xref: /DragonOS/kernel/src/mm/syscall.rs (revision 4dd4856f933be0b4624c7f7ffa9e3d0c8c218873)
1 use core::{intrinsics::unlikely, slice::from_raw_parts};
2 
3 use alloc::sync::Arc;
4 use log::error;
5 use system_error::SystemError;
6 
7 use crate::{
8     arch::MMArch,
9     driver::base::block::SeekFrom,
10     ipc::shm::ShmFlags,
11     libs::align::{check_aligned, page_align_up},
12     mm::MemoryManagementArch,
13     syscall::Syscall,
14 };
15 
16 use super::{
17     allocator::page_frame::{PageFrameCount, VirtPageFrame},
18     ucontext::{AddressSpace, DEFAULT_MMAP_MIN_ADDR},
19     verify_area, MsFlags, VirtAddr, VmFlags,
20 };
21 
22 bitflags! {
23     /// Memory protection flags
24     pub struct ProtFlags: u64 {
25         const PROT_NONE = 0x0;
26         const PROT_READ = 0x1;
27         const PROT_WRITE = 0x2;
28         const PROT_EXEC = 0x4;
29     }
30 
31     /// Memory mapping flags
32     pub struct MapFlags: u64 {
33         const MAP_NONE = 0x0;
34         /// share changes
35         const MAP_SHARED = 0x1;
36         /// changes are private
37         const MAP_PRIVATE = 0x2;
38         /// Interpret addr exactly
39         const MAP_FIXED = 0x10;
40         /// don't use a file
41         const MAP_ANONYMOUS = 0x20;
42         // linux-6.1-rc5/include/uapi/asm-generic/mman.h#7
43         /// stack-like segment
44         const MAP_GROWSDOWN = 0x100;
45         /// ETXTBSY
46         const MAP_DENYWRITE = 0x800;
47         /// Mark it as an executable
48         const MAP_EXECUTABLE = 0x1000;
49         /// Pages are locked
50         const MAP_LOCKED = 0x2000;
51         /// don't check for reservations
52         const MAP_NORESERVE = 0x4000;
53         /// populate (prefault) pagetables
54         const MAP_POPULATE = 0x8000;
55         /// do not block on IO
56         const MAP_NONBLOCK = 0x10000;
57         /// give out an address that is best suited for process/thread stacks
58         const MAP_STACK = 0x20000;
59         /// create a huge page mapping
60         const MAP_HUGETLB = 0x40000;
61         /// perform synchronous page faults for the mapping
62         const MAP_SYNC = 0x80000;
63         /// MAP_FIXED which doesn't unmap underlying mapping
64         const MAP_FIXED_NOREPLACE = 0x100000;
65 
66         /// For anonymous mmap, memory could be uninitialized
67         const MAP_UNINITIALIZED = 0x4000000;
68     }
69 
70     /// Memory mremapping flags
71     pub struct MremapFlags: u8 {
72         const MREMAP_MAYMOVE = 1;
73         const MREMAP_FIXED = 2;
74         const MREMAP_DONTUNMAP = 4;
75     }
76 
77 
78     pub struct MadvFlags: u64 {
79         /// 默认行为,系统会进行一定的预读和预写,适用于一般读取场景
80         const MADV_NORMAL = 0;
81         /// 随机访问模式,系统会尽量最小化数据读取量,适用于随机访问的场景
82         const MADV_RANDOM = 1;
83         /// 顺序访问模式,系统会进行积极的预读,访问后的页面可以尽快释放,适用于顺序读取场景
84         const MADV_SEQUENTIAL = 2;
85         /// 通知系统预读某些页面,用于应用程序提前准备数据
86         const MADV_WILLNEED = 3;
87         /// 通知系统应用程序不再需要某些页面,内核可以释放相关资源
88         const MADV_DONTNEED = 4;
89 
90         /// 将指定范围的页面标记为延迟释放,真正的释放会延迟至内存压力发生时
91         const MADV_FREE = 8;
92         /// 应用程序请求释放指定范围的页面和相关的后备存储
93         const MADV_REMOVE = 9;
94         /// 在 fork 时排除指定区域
95         const MADV_DONTFORK = 10;
96         /// 取消 MADV_DONTFORK 的效果,不再在 fork 时排除指定区域
97         const MADV_DOFORK = 11;
98         /// 模拟内存硬件错误,触发内存错误处理器处理
99         const MADV_HWPOISON = 100;
100         /// 尝试软下线指定的内存范围
101         const MADV_SOFT_OFFLINE = 101;
102 
103         /// 应用程序建议内核尝试合并指定范围内内容相同的页面
104         const MADV_MERGEABLE = 12;
105         /// 取消 MADV_MERGEABLE 的效果,不再合并页面
106         const MADV_UNMERGEABLE = 13;
107 
108         /// 应用程序希望将指定范围以透明大页方式支持
109         const MADV_HUGEPAGE = 14;
110         /// 将指定范围标记为不值得用透明大页支持
111         const MADV_NOHUGEPAGE = 15;
112 
113         /// 应用程序请求在核心转储时排除指定范围内的页面
114         const MADV_DONTDUMP = 16;
115         /// 取消 MADV_DONTDUMP 的效果,不再排除核心转储时的页面
116         const MADV_DODUMP = 17;
117 
118         /// 在 fork 时将子进程的该区域内存填充为零
119         const MADV_WIPEONFORK = 18;
120         /// 取消 `MADV_WIPEONFORK` 的效果,不再在 fork 时填充子进程的内存
121         const MADV_KEEPONFORK = 19;
122 
123         /// 应用程序不会立刻使用这些内存,内核将页面设置为非活动状态以便在内存压力发生时轻松回收
124         const MADV_COLD = 20;
125         /// 应用程序不会立刻使用这些内存,内核立即将这些页面换出
126         const MADV_PAGEOUT = 21;
127 
128         /// 预先填充页面表,可读,通过触发读取故障
129         const MADV_POPULATE_READ = 22;
130         /// 预先填充页面表,可写,通过触发写入故障
131         const MADV_POPULATE_WRITE = 23;
132 
133         /// 与 `MADV_DONTNEED` 类似,会将被锁定的页面释放
134         const MADV_DONTNEED_LOCKED = 24;
135 
136         /// 同步将页面合并为新的透明大页
137         const MADV_COLLAPSE = 25;
138 
139     }
140 }
141 
142 impl From<MapFlags> for VmFlags {
143     fn from(map_flags: MapFlags) -> Self {
144         let mut vm_flags = VmFlags::VM_NONE;
145 
146         if map_flags.contains(MapFlags::MAP_GROWSDOWN) {
147             vm_flags |= VmFlags::VM_GROWSDOWN;
148         }
149 
150         if map_flags.contains(MapFlags::MAP_LOCKED) {
151             vm_flags |= VmFlags::VM_LOCKED;
152         }
153 
154         if map_flags.contains(MapFlags::MAP_SYNC) {
155             vm_flags |= VmFlags::VM_SYNC;
156         }
157 
158         if map_flags.contains(MapFlags::MAP_SHARED) {
159             vm_flags |= VmFlags::VM_SHARED;
160         }
161 
162         vm_flags
163     }
164 }
165 
166 impl From<ProtFlags> for VmFlags {
167     fn from(prot_flags: ProtFlags) -> Self {
168         let mut vm_flags = VmFlags::VM_NONE;
169 
170         if prot_flags.contains(ProtFlags::PROT_READ) {
171             vm_flags |= VmFlags::VM_READ;
172         }
173 
174         if prot_flags.contains(ProtFlags::PROT_WRITE) {
175             vm_flags |= VmFlags::VM_WRITE;
176         }
177 
178         if prot_flags.contains(ProtFlags::PROT_EXEC) {
179             vm_flags |= VmFlags::VM_EXEC;
180         }
181 
182         vm_flags
183     }
184 }
185 
186 impl From<ShmFlags> for VmFlags {
187     fn from(shm_flags: ShmFlags) -> Self {
188         let mut vm_flags = VmFlags::VM_NONE;
189 
190         if shm_flags.contains(ShmFlags::SHM_RDONLY) {
191             vm_flags |= VmFlags::VM_READ;
192         } else {
193             vm_flags |= VmFlags::VM_READ | VmFlags::VM_WRITE;
194         }
195 
196         if shm_flags.contains(ShmFlags::SHM_EXEC) {
197             vm_flags |= VmFlags::VM_EXEC;
198         }
199 
200         if shm_flags.contains(ShmFlags::SHM_HUGETLB) {
201             vm_flags |= VmFlags::VM_HUGETLB;
202         }
203 
204         vm_flags
205     }
206 }
207 
208 impl From<VmFlags> for MapFlags {
209     fn from(value: VmFlags) -> Self {
210         let mut map_flags = MapFlags::MAP_NONE;
211 
212         if value.contains(VmFlags::VM_GROWSDOWN) {
213             map_flags |= MapFlags::MAP_GROWSDOWN;
214         }
215 
216         if value.contains(VmFlags::VM_LOCKED) {
217             map_flags |= MapFlags::MAP_LOCKED;
218         }
219 
220         if value.contains(VmFlags::VM_SYNC) {
221             map_flags |= MapFlags::MAP_SYNC;
222         }
223 
224         if value.contains(VmFlags::VM_MAYSHARE) {
225             map_flags |= MapFlags::MAP_SHARED;
226         }
227 
228         map_flags
229     }
230 }
231 
232 impl From<VmFlags> for ProtFlags {
233     fn from(value: VmFlags) -> Self {
234         let mut prot_flags = ProtFlags::PROT_NONE;
235 
236         if value.contains(VmFlags::VM_READ) {
237             prot_flags |= ProtFlags::PROT_READ;
238         }
239 
240         if value.contains(VmFlags::VM_WRITE) {
241             prot_flags |= ProtFlags::PROT_WRITE;
242         }
243 
244         if value.contains(VmFlags::VM_EXEC) {
245             prot_flags |= ProtFlags::PROT_EXEC;
246         }
247 
248         prot_flags
249     }
250 }
251 
252 impl Syscall {
253     pub fn brk(new_addr: VirtAddr) -> Result<VirtAddr, SystemError> {
254         // debug!("brk: new_addr={:?}", new_addr);
255         let address_space = AddressSpace::current()?;
256         let mut address_space = address_space.write();
257 
258         if new_addr < address_space.brk_start || new_addr >= MMArch::USER_END_VADDR {
259             return Ok(address_space.brk);
260         }
261         if new_addr == address_space.brk {
262             return Ok(address_space.brk);
263         }
264 
265         unsafe {
266             address_space
267                 .set_brk(VirtAddr::new(page_align_up(new_addr.data())))
268                 .ok();
269 
270             return Ok(address_space.sbrk(0).unwrap());
271         }
272     }
273 
274     pub fn sbrk(incr: isize) -> Result<VirtAddr, SystemError> {
275         let address_space = AddressSpace::current()?;
276         assert!(address_space.read().user_mapper.utable.is_current());
277         let mut address_space = address_space.write();
278         let r = unsafe { address_space.sbrk(incr) };
279 
280         return r;
281     }
282 
283     /// ## mmap系统调用
284     ///
285     /// 该函数的实现参考了Linux内核的实现,但是并不完全相同。因为有些功能咱们还没实现
286     ///
287     /// ## 参数
288     ///
289     /// - `start_vaddr`:映射的起始地址
290     /// - `len`:映射的长度
291     /// - `prot`:保护标志
292     /// - `flags`:映射标志
293     /// - `fd`:文件描述符(暂时不支持)
294     /// - `offset`:文件偏移量 (暂时不支持)
295     ///
296     /// ## 返回值
297     ///
298     /// 成功时返回映射的起始地址,失败时返回错误码
299     pub fn mmap(
300         start_vaddr: VirtAddr,
301         len: usize,
302         prot_flags: usize,
303         map_flags: usize,
304         fd: i32,
305         offset: usize,
306     ) -> Result<usize, SystemError> {
307         let map_flags = MapFlags::from_bits_truncate(map_flags as u64);
308         let prot_flags = ProtFlags::from_bits_truncate(prot_flags as u64);
309 
310         if start_vaddr < VirtAddr::new(DEFAULT_MMAP_MIN_ADDR)
311             && map_flags.contains(MapFlags::MAP_FIXED)
312         {
313             error!(
314                 "mmap: MAP_FIXED is not supported for address below {}",
315                 DEFAULT_MMAP_MIN_ADDR
316             );
317             return Err(SystemError::EINVAL);
318         }
319 
320         // 暂时不支持巨页映射
321         if map_flags.contains(MapFlags::MAP_HUGETLB) {
322             error!("mmap: not support huge page mapping");
323             return Err(SystemError::ENOSYS);
324         }
325         let current_address_space = AddressSpace::current()?;
326         let start_page = if map_flags.contains(MapFlags::MAP_ANONYMOUS) {
327             // 匿名映射
328             current_address_space.write().map_anonymous(
329                 start_vaddr,
330                 len,
331                 prot_flags,
332                 map_flags,
333                 true,
334                 false,
335             )?
336         } else {
337             // 文件映射
338             current_address_space.write().file_mapping(
339                 start_vaddr,
340                 len,
341                 prot_flags,
342                 map_flags,
343                 fd,
344                 offset,
345                 true,
346                 false,
347             )?
348         };
349 
350         return Ok(start_page.virt_address().data());
351     }
352 
353     /// ## mremap系统调用
354     ///
355     ///
356     /// ## 参数
357     ///
358     /// - `old_vaddr`:原映射的起始地址
359     /// - `old_len`:原映射的长度
360     /// - `new_len`:重新映射的长度
361     /// - `mremap_flags`:重映射标志
362     /// - `new_vaddr`:重新映射的起始地址
363     ///
364     /// ## 返回值
365     ///
366     /// 成功时返回重映射的起始地址,失败时返回错误码
367     pub fn mremap(
368         old_vaddr: VirtAddr,
369         old_len: usize,
370         new_len: usize,
371         mremap_flags: MremapFlags,
372         new_vaddr: VirtAddr,
373     ) -> Result<usize, SystemError> {
374         // 需要重映射到新内存区域的情况下,必须包含MREMAP_MAYMOVE并且指定新地址
375         if mremap_flags.contains(MremapFlags::MREMAP_FIXED)
376             && (!mremap_flags.contains(MremapFlags::MREMAP_MAYMOVE)
377                 || new_vaddr == VirtAddr::new(0))
378         {
379             return Err(SystemError::EINVAL);
380         }
381 
382         // 不取消旧映射的情况下,必须包含MREMAP_MAYMOVE并且新内存大小等于旧内存大小
383         if mremap_flags.contains(MremapFlags::MREMAP_DONTUNMAP)
384             && (!mremap_flags.contains(MremapFlags::MREMAP_MAYMOVE) || old_len != new_len)
385         {
386             return Err(SystemError::EINVAL);
387         }
388 
389         // 旧内存地址必须对齐
390         if !old_vaddr.check_aligned(MMArch::PAGE_SIZE) {
391             return Err(SystemError::EINVAL);
392         }
393 
394         // 将old_len、new_len 对齐页面大小
395         let old_len = page_align_up(old_len);
396         let new_len = page_align_up(new_len);
397 
398         // 不允许重映射内存区域大小为0
399         if new_len == 0 {
400             return Err(SystemError::EINVAL);
401         }
402 
403         let current_address_space = AddressSpace::current()?;
404         let vma = current_address_space.read().mappings.contains(old_vaddr);
405         if vma.is_none() {
406             return Err(SystemError::EINVAL);
407         }
408         let vma = vma.unwrap();
409         let vm_flags = *vma.lock_irqsave().vm_flags();
410 
411         // 暂时不支持巨页映射
412         if vm_flags.contains(VmFlags::VM_HUGETLB) {
413             error!("mmap: not support huge page mapping");
414             return Err(SystemError::ENOSYS);
415         }
416 
417         // 缩小旧内存映射区域
418         if old_len > new_len {
419             Self::munmap(old_vaddr + new_len, old_len - new_len)?;
420             return Ok(old_vaddr.data());
421         }
422 
423         // 重映射到新内存区域
424         let r = current_address_space.write().mremap(
425             old_vaddr,
426             old_len,
427             new_len,
428             mremap_flags,
429             new_vaddr,
430             vm_flags,
431         )?;
432 
433         if !mremap_flags.contains(MremapFlags::MREMAP_DONTUNMAP) {
434             Self::munmap(old_vaddr, old_len)?;
435         }
436 
437         return Ok(r.data());
438     }
439 
440     /// ## munmap系统调用
441     ///
442     /// ## 参数
443     ///
444     /// - `start_vaddr`:取消映射的起始地址(已经对齐到页)
445     /// - `len`:取消映射的字节数(已经对齐到页)
446     ///
447     /// ## 返回值
448     ///
449     /// 成功时返回0,失败时返回错误码
450     pub fn munmap(start_vaddr: VirtAddr, len: usize) -> Result<usize, SystemError> {
451         assert!(start_vaddr.check_aligned(MMArch::PAGE_SIZE));
452         assert!(check_aligned(len, MMArch::PAGE_SIZE));
453 
454         if unlikely(verify_area(start_vaddr, len).is_err()) {
455             return Err(SystemError::EINVAL);
456         }
457         if unlikely(len == 0) {
458             return Err(SystemError::EINVAL);
459         }
460 
461         let current_address_space: Arc<AddressSpace> = AddressSpace::current()?;
462         let start_frame = VirtPageFrame::new(start_vaddr);
463         let page_count = PageFrameCount::new(len / MMArch::PAGE_SIZE);
464 
465         current_address_space
466             .write()
467             .munmap(start_frame, page_count)
468             .map_err(|_| SystemError::EINVAL)?;
469 
470         return Ok(0);
471     }
472 
473     /// ## mprotect系统调用
474     ///
475     /// ## 参数
476     ///
477     /// - `start_vaddr`:起始地址(已经对齐到页)
478     /// - `len`:长度(已经对齐到页)
479     /// - `prot_flags`:保护标志
480     pub fn mprotect(
481         start_vaddr: VirtAddr,
482         len: usize,
483         prot_flags: usize,
484     ) -> Result<usize, SystemError> {
485         assert!(start_vaddr.check_aligned(MMArch::PAGE_SIZE));
486         assert!(check_aligned(len, MMArch::PAGE_SIZE));
487 
488         if unlikely(verify_area(start_vaddr, len).is_err()) {
489             return Err(SystemError::EINVAL);
490         }
491         if unlikely(len == 0) {
492             return Err(SystemError::EINVAL);
493         }
494 
495         let prot_flags = ProtFlags::from_bits(prot_flags as u64).ok_or(SystemError::EINVAL)?;
496 
497         let current_address_space: Arc<AddressSpace> = AddressSpace::current()?;
498         let start_frame = VirtPageFrame::new(start_vaddr);
499         let page_count = PageFrameCount::new(len / MMArch::PAGE_SIZE);
500 
501         current_address_space
502             .write()
503             .mprotect(start_frame, page_count, prot_flags)
504             .map_err(|_| SystemError::EINVAL)?;
505         return Ok(0);
506     }
507 
508     /// ## madvise系统调用
509     ///
510     /// ## 参数
511     ///
512     /// - `start_vaddr`:起始地址(已经对齐到页)
513     /// - `len`:长度(已经对齐到页)
514     /// - `madv_flags`:建议标志
515     pub fn madvise(
516         start_vaddr: VirtAddr,
517         len: usize,
518         madv_flags: usize,
519     ) -> Result<usize, SystemError> {
520         if !start_vaddr.check_aligned(MMArch::PAGE_SIZE) || !check_aligned(len, MMArch::PAGE_SIZE) {
521             return Err(SystemError::EINVAL);
522         }
523 
524         if unlikely(verify_area(start_vaddr, len).is_err()) {
525             return Err(SystemError::EINVAL);
526         }
527         if unlikely(len == 0) {
528             return Err(SystemError::EINVAL);
529         }
530 
531         let madv_flags = MadvFlags::from_bits(madv_flags as u64).ok_or(SystemError::EINVAL)?;
532 
533         let current_address_space: Arc<AddressSpace> = AddressSpace::current()?;
534         let start_frame = VirtPageFrame::new(start_vaddr);
535         let page_count = PageFrameCount::new(len / MMArch::PAGE_SIZE);
536 
537         current_address_space
538             .write()
539             .madvise(start_frame, page_count, madv_flags)
540             .map_err(|_| SystemError::EINVAL)?;
541         return Ok(0);
542     }
543 
544     /// ## msync系统调用
545     ///
546     /// ## 参数
547     ///
548     /// - `start`:起始地址(已经对齐到页)
549     /// - `len`:长度(已经对齐到页)
550     /// - `flags`:标志
551     pub fn msync(start: VirtAddr, len: usize, flags: usize) -> Result<usize, SystemError> {
552         if !start.check_aligned(MMArch::PAGE_SIZE) || !check_aligned(len, MMArch::PAGE_SIZE) {
553             return Err(SystemError::EINVAL);
554         }
555 
556         if unlikely(verify_area(start, len).is_err()) {
557             return Err(SystemError::EINVAL);
558         }
559         if unlikely(len == 0) {
560             return Err(SystemError::EINVAL);
561         }
562 
563         let mut start = start.data();
564         let end = start + len;
565         let flags = MsFlags::from_bits_truncate(flags);
566         let mut unmapped_error = Ok(0);
567 
568         if !flags.intersects(MsFlags::MS_ASYNC | MsFlags::MS_INVALIDATE | MsFlags::MS_SYNC) {
569             return Err(SystemError::EINVAL);
570         }
571 
572         if flags.contains(MsFlags::MS_ASYNC | MsFlags::MS_SYNC) {
573             return Err(SystemError::EINVAL);
574         }
575 
576         if end < start {
577             return Err(SystemError::ENOMEM);
578         }
579 
580         if start == end {
581             return Ok(0);
582         }
583 
584         let current_address_space = AddressSpace::current()?;
585         let mut err = Err(SystemError::ENOMEM);
586         let mut next_vma = current_address_space
587             .read()
588             .mappings
589             .find_nearest(VirtAddr::new(start));
590         loop {
591             if let Some(vma) = next_vma.clone() {
592                 let guard = vma.lock_irqsave();
593                 let vm_start = guard.region().start().data();
594                 let vm_end = guard.region().end().data();
595                 if start < vm_start {
596                     if flags == MsFlags::MS_ASYNC {
597                         break;
598                     }
599                     start = vm_start;
600                     if start >= vm_end {
601                         break;
602                     }
603                     unmapped_error = Err(SystemError::ENOMEM);
604                 }
605                 let vm_flags = *guard.vm_flags();
606                 if flags.contains(MsFlags::MS_INVALIDATE) && vm_flags.contains(VmFlags::VM_LOCKED) {
607                     err = Err(SystemError::EBUSY);
608                     break;
609                 }
610                 let file = guard.vm_file();
611                 let fstart = (start - vm_start)
612                     + (guard.file_page_offset().unwrap_or(0) << MMArch::PAGE_SHIFT);
613                 let fend = fstart + (core::cmp::min(end, vm_end) - start) - 1;
614                 let old_start = start;
615                 start = vm_end;
616                 // log::info!("flags: {:?}", flags);
617                 // log::info!("vm_flags: {:?}", vm_flags);
618                 // log::info!("file: {:?}", file);
619                 if flags.contains(MsFlags::MS_SYNC) && vm_flags.contains(VmFlags::VM_SHARED) {
620                     if let Some(file) = file {
621                         let old_pos = file.lseek(SeekFrom::SeekCurrent(0)).unwrap();
622                         file.lseek(SeekFrom::SeekSet(fstart as i64)).unwrap();
623                         err = file.write(len, unsafe {
624                             from_raw_parts(old_start as *mut u8, fend - fstart + 1)
625                         });
626                         file.lseek(SeekFrom::SeekSet(old_pos as i64)).unwrap();
627                         if err.is_err() {
628                             break;
629                         } else if start >= end {
630                             err = unmapped_error;
631                             break;
632                         }
633                         next_vma = current_address_space
634                             .read()
635                             .mappings
636                             .find_nearest(VirtAddr::new(start));
637                     }
638                 } else {
639                     if start >= end {
640                         err = unmapped_error;
641                         break;
642                     }
643                     next_vma = current_address_space
644                         .read()
645                         .mappings
646                         .find_nearest(VirtAddr::new(vm_end));
647                 }
648             } else {
649                 return Err(SystemError::ENOMEM);
650             }
651         }
652         return err;
653     }
654 }
655