xref: /DragonOS/kernel/src/net/syscall.rs (revision 338f6903262c5031abad3c8e361813355a27fcdb)
1 use core::cmp::min;
2 
3 use alloc::{boxed::Box, sync::Arc};
4 use num_traits::{FromPrimitive, ToPrimitive};
5 use smoltcp::wire;
6 use system_error::SystemError;
7 
8 use crate::{
9     filesystem::vfs::{
10         file::{File, FileMode},
11         syscall::{IoVec, IoVecs},
12     },
13     libs::spinlock::SpinLockGuard,
14     mm::{verify_area, VirtAddr},
15     net::socket::{AddressFamily, SOL_SOCKET},
16     process::ProcessManager,
17     syscall::Syscall,
18 };
19 
20 use super::{
21     socket::{new_socket, PosixSocketType, Socket, SocketHandleItem, SocketInode, HANDLE_MAP},
22     Endpoint, Protocol, ShutdownType,
23 };
24 
25 /// Flags for socket, socketpair, accept4
26 const SOCK_CLOEXEC: FileMode = FileMode::O_CLOEXEC;
27 const SOCK_NONBLOCK: FileMode = FileMode::O_NONBLOCK;
28 
29 impl Syscall {
30     /// @brief sys_socket系统调用的实际执行函数
31     ///
32     /// @param address_family 地址族
33     /// @param socket_type socket类型
34     /// @param protocol 传输协议
35     pub fn socket(
36         address_family: usize,
37         socket_type: usize,
38         protocol: usize,
39     ) -> Result<usize, SystemError> {
40         let address_family = AddressFamily::try_from(address_family as u16)?;
41         let socket_type = PosixSocketType::try_from((socket_type & 0xf) as u8)?;
42         let protocol = Protocol::from(protocol as u8);
43 
44         let socket = new_socket(address_family, socket_type, protocol)?;
45 
46         let handle_item = SocketHandleItem::new(&socket);
47         HANDLE_MAP
48             .write_irqsave()
49             .insert(socket.socket_handle(), handle_item);
50 
51         let socketinode: Arc<SocketInode> = SocketInode::new(socket);
52         let f = File::new(socketinode, FileMode::O_RDWR)?;
53         // 把socket添加到当前进程的文件描述符表中
54         let binding = ProcessManager::current_pcb().fd_table();
55         let mut fd_table_guard = binding.write();
56         let fd = fd_table_guard.alloc_fd(f, None).map(|x| x as usize);
57         drop(fd_table_guard);
58         return fd;
59     }
60 
61     /// # sys_socketpair系统调用的实际执行函数
62     ///
63     /// ## 参数
64     /// - `address_family`: 地址族
65     /// - `socket_type`: socket类型
66     /// - `protocol`: 传输协议
67     /// - `fds`: 用于返回文件描述符的数组
68     pub fn socketpair(
69         address_family: usize,
70         socket_type: usize,
71         protocol: usize,
72         fds: &mut [i32],
73     ) -> Result<usize, SystemError> {
74         let address_family = AddressFamily::try_from(address_family as u16)?;
75         let socket_type = PosixSocketType::try_from((socket_type & 0xf) as u8)?;
76         let protocol = Protocol::from(protocol as u8);
77 
78         let mut socket0 = new_socket(address_family, socket_type, protocol)?;
79         let mut socket1 = new_socket(address_family, socket_type, protocol)?;
80 
81         socket0
82             .socketpair_ops()
83             .unwrap()
84             .socketpair(&mut socket0, &mut socket1);
85 
86         let binding = ProcessManager::current_pcb().fd_table();
87         let mut fd_table_guard = binding.write();
88 
89         let mut alloc_fd = |socket: Box<dyn Socket>| -> Result<i32, SystemError> {
90             let socketinode = SocketInode::new(socket);
91             let file = File::new(socketinode, FileMode::O_RDWR)?;
92             fd_table_guard.alloc_fd(file, None)
93         };
94 
95         fds[0] = alloc_fd(socket0)?;
96         fds[1] = alloc_fd(socket1)?;
97 
98         drop(fd_table_guard);
99         Ok(0)
100     }
101 
102     /// @brief sys_setsockopt系统调用的实际执行函数
103     ///
104     /// @param fd 文件描述符
105     /// @param level 选项级别
106     /// @param optname 选项名称
107     /// @param optval 选项值
108     /// @param optlen optval缓冲区长度
109     pub fn setsockopt(
110         fd: usize,
111         level: usize,
112         optname: usize,
113         optval: &[u8],
114     ) -> Result<usize, SystemError> {
115         let socket_inode: Arc<SocketInode> = ProcessManager::current_pcb()
116             .get_socket(fd as i32)
117             .ok_or(SystemError::EBADF)?;
118         // 获取内层的socket(真正的数据)
119         let socket: SpinLockGuard<Box<dyn Socket>> = socket_inode.inner();
120         return socket.setsockopt(level, optname, optval).map(|_| 0);
121     }
122 
123     /// @brief sys_getsockopt系统调用的实际执行函数
124     ///
125     /// 参考:https://man7.org/linux/man-pages/man2/setsockopt.2.html
126     ///
127     /// @param fd 文件描述符
128     /// @param level 选项级别
129     /// @param optname 选项名称
130     /// @param optval 返回的选项值
131     /// @param optlen 返回的optval缓冲区长度
132     pub fn getsockopt(
133         fd: usize,
134         level: usize,
135         optname: usize,
136         optval: *mut u8,
137         optlen: *mut u32,
138     ) -> Result<usize, SystemError> {
139         // 获取socket
140         let optval = optval as *mut u32;
141         let binding: Arc<SocketInode> = ProcessManager::current_pcb()
142             .get_socket(fd as i32)
143             .ok_or(SystemError::EBADF)?;
144         let socket = binding.inner();
145 
146         if level as u8 == SOL_SOCKET {
147             let optname = PosixSocketOption::try_from(optname as i32)
148                 .map_err(|_| SystemError::ENOPROTOOPT)?;
149             match optname {
150                 PosixSocketOption::SO_SNDBUF => {
151                     // 返回发送缓冲区大小
152                     unsafe {
153                         *optval = socket.metadata()?.tx_buf_size as u32;
154                         *optlen = core::mem::size_of::<u32>() as u32;
155                     }
156                     return Ok(0);
157                 }
158                 PosixSocketOption::SO_RCVBUF => {
159                     let optval = optval as *mut u32;
160                     // 返回默认的接收缓冲区大小
161                     unsafe {
162                         *optval = socket.metadata()?.rx_buf_size as u32;
163                         *optlen = core::mem::size_of::<u32>() as u32;
164                     }
165                     return Ok(0);
166                 }
167                 _ => {
168                     return Err(SystemError::ENOPROTOOPT);
169                 }
170             }
171         }
172         drop(socket);
173 
174         // To manipulate options at any other level the
175         // protocol number of the appropriate protocol controlling the
176         // option is supplied.  For example, to indicate that an option is
177         // to be interpreted by the TCP protocol, level should be set to the
178         // protocol number of TCP.
179 
180         let posix_protocol =
181             PosixIpProtocol::try_from(level as u16).map_err(|_| SystemError::ENOPROTOOPT)?;
182         if posix_protocol == PosixIpProtocol::TCP {
183             let optname = PosixTcpSocketOptions::try_from(optname as i32)
184                 .map_err(|_| SystemError::ENOPROTOOPT)?;
185             match optname {
186                 PosixTcpSocketOptions::Congestion => return Ok(0),
187                 _ => {
188                     return Err(SystemError::ENOPROTOOPT);
189                 }
190             }
191         }
192         return Err(SystemError::ENOPROTOOPT);
193     }
194 
195     /// @brief sys_connect系统调用的实际执行函数
196     ///
197     /// @param fd 文件描述符
198     /// @param addr SockAddr
199     /// @param addrlen 地址长度
200     ///
201     /// @return 成功返回0,失败返回错误码
202     pub fn connect(fd: usize, addr: *const SockAddr, addrlen: usize) -> Result<usize, SystemError> {
203         let endpoint: Endpoint = SockAddr::to_endpoint(addr, addrlen)?;
204         let socket: Arc<SocketInode> = ProcessManager::current_pcb()
205             .get_socket(fd as i32)
206             .ok_or(SystemError::EBADF)?;
207         let mut socket = unsafe { socket.inner_no_preempt() };
208         // kdebug!("connect to {:?}...", endpoint);
209         socket.connect(endpoint)?;
210         return Ok(0);
211     }
212 
213     /// @brief sys_bind系统调用的实际执行函数
214     ///
215     /// @param fd 文件描述符
216     /// @param addr SockAddr
217     /// @param addrlen 地址长度
218     ///
219     /// @return 成功返回0,失败返回错误码
220     pub fn bind(fd: usize, addr: *const SockAddr, addrlen: usize) -> Result<usize, SystemError> {
221         let endpoint: Endpoint = SockAddr::to_endpoint(addr, addrlen)?;
222         let socket: Arc<SocketInode> = ProcessManager::current_pcb()
223             .get_socket(fd as i32)
224             .ok_or(SystemError::EBADF)?;
225         let mut socket = unsafe { socket.inner_no_preempt() };
226         socket.bind(endpoint)?;
227         return Ok(0);
228     }
229 
230     /// @brief sys_sendto系统调用的实际执行函数
231     ///
232     /// @param fd 文件描述符
233     /// @param buf 发送缓冲区
234     /// @param flags 标志
235     /// @param addr SockAddr
236     /// @param addrlen 地址长度
237     ///
238     /// @return 成功返回发送的字节数,失败返回错误码
239     pub fn sendto(
240         fd: usize,
241         buf: &[u8],
242         _flags: u32,
243         addr: *const SockAddr,
244         addrlen: usize,
245     ) -> Result<usize, SystemError> {
246         let endpoint = if addr.is_null() {
247             None
248         } else {
249             Some(SockAddr::to_endpoint(addr, addrlen)?)
250         };
251 
252         let socket: Arc<SocketInode> = ProcessManager::current_pcb()
253             .get_socket(fd as i32)
254             .ok_or(SystemError::EBADF)?;
255         let socket = unsafe { socket.inner_no_preempt() };
256         return socket.write(buf, endpoint);
257     }
258 
259     /// @brief sys_recvfrom系统调用的实际执行函数
260     ///
261     /// @param fd 文件描述符
262     /// @param buf 接收缓冲区
263     /// @param flags 标志
264     /// @param addr SockAddr
265     /// @param addrlen 地址长度
266     ///
267     /// @return 成功返回接收的字节数,失败返回错误码
268     pub fn recvfrom(
269         fd: usize,
270         buf: &mut [u8],
271         _flags: u32,
272         addr: *mut SockAddr,
273         addrlen: *mut u32,
274     ) -> Result<usize, SystemError> {
275         let socket: Arc<SocketInode> = ProcessManager::current_pcb()
276             .get_socket(fd as i32)
277             .ok_or(SystemError::EBADF)?;
278         let mut socket = unsafe { socket.inner_no_preempt() };
279 
280         let (n, endpoint) = socket.read(buf);
281         drop(socket);
282 
283         let n: usize = n?;
284 
285         // 如果有地址信息,将地址信息写入用户空间
286         if !addr.is_null() {
287             let sockaddr_in = SockAddr::from(endpoint);
288             unsafe {
289                 sockaddr_in.write_to_user(addr, addrlen)?;
290             }
291         }
292         return Ok(n);
293     }
294 
295     /// @brief sys_recvmsg系统调用的实际执行函数
296     ///
297     /// @param fd 文件描述符
298     /// @param msg MsgHdr
299     /// @param flags 标志,暂时未使用
300     ///
301     /// @return 成功返回接收的字节数,失败返回错误码
302     pub fn recvmsg(fd: usize, msg: &mut MsgHdr, _flags: u32) -> Result<usize, SystemError> {
303         // 检查每个缓冲区地址是否合法,生成iovecs
304         let mut iovs = unsafe { IoVecs::from_user(msg.msg_iov, msg.msg_iovlen, true)? };
305 
306         let socket: Arc<SocketInode> = ProcessManager::current_pcb()
307             .get_socket(fd as i32)
308             .ok_or(SystemError::EBADF)?;
309         let mut socket = unsafe { socket.inner_no_preempt() };
310 
311         let mut buf = iovs.new_buf(true);
312         // 从socket中读取数据
313         let (n, endpoint) = socket.read(&mut buf);
314         drop(socket);
315 
316         let n: usize = n?;
317 
318         // 将数据写入用户空间的iovecs
319         iovs.scatter(&buf[..n]);
320 
321         let sockaddr_in = SockAddr::from(endpoint);
322         unsafe {
323             sockaddr_in.write_to_user(msg.msg_name, &mut msg.msg_namelen)?;
324         }
325         return Ok(n);
326     }
327 
328     /// @brief sys_listen系统调用的实际执行函数
329     ///
330     /// @param fd 文件描述符
331     /// @param backlog 队列最大连接数
332     ///
333     /// @return 成功返回0,失败返回错误码
334     pub fn listen(fd: usize, backlog: usize) -> Result<usize, SystemError> {
335         let socket: Arc<SocketInode> = ProcessManager::current_pcb()
336             .get_socket(fd as i32)
337             .ok_or(SystemError::EBADF)?;
338         let mut socket = unsafe { socket.inner_no_preempt() };
339         socket.listen(backlog)?;
340         return Ok(0);
341     }
342 
343     /// @brief sys_shutdown系统调用的实际执行函数
344     ///
345     /// @param fd 文件描述符
346     /// @param how 关闭方式
347     ///
348     /// @return 成功返回0,失败返回错误码
349     pub fn shutdown(fd: usize, how: usize) -> Result<usize, SystemError> {
350         let socket: Arc<SocketInode> = ProcessManager::current_pcb()
351             .get_socket(fd as i32)
352             .ok_or(SystemError::EBADF)?;
353         let mut socket = unsafe { socket.inner_no_preempt() };
354         socket.shutdown(ShutdownType::from_bits_truncate(how as u8))?;
355         return Ok(0);
356     }
357 
358     /// @brief sys_accept系统调用的实际执行函数
359     ///
360     /// @param fd 文件描述符
361     /// @param addr SockAddr
362     /// @param addrlen 地址长度
363     ///
364     /// @return 成功返回新的文件描述符,失败返回错误码
365     pub fn accept(fd: usize, addr: *mut SockAddr, addrlen: *mut u32) -> Result<usize, SystemError> {
366         return Self::do_accept(fd, addr, addrlen, 0);
367     }
368 
369     /// sys_accept4 - accept a connection on a socket
370     ///
371     ///
372     /// If flags is 0, then accept4() is the same as accept().  The
373     ///    following values can be bitwise ORed in flags to obtain different
374     ///    behavior:
375     ///
376     /// - SOCK_NONBLOCK
377     ///     Set the O_NONBLOCK file status flag on the open file
378     ///     description (see open(2)) referred to by the new file
379     ///     descriptor.  Using this flag saves extra calls to fcntl(2)
380     ///     to achieve the same result.
381     ///
382     /// - SOCK_CLOEXEC
383     ///     Set the close-on-exec (FD_CLOEXEC) flag on the new file
384     ///     descriptor.  See the description of the O_CLOEXEC flag in
385     ///     open(2) for reasons why this may be useful.
386     pub fn accept4(
387         fd: usize,
388         addr: *mut SockAddr,
389         addrlen: *mut u32,
390         mut flags: u32,
391     ) -> Result<usize, SystemError> {
392         // 如果flags不合法,返回错误
393         if (flags & (!(SOCK_CLOEXEC | SOCK_NONBLOCK)).bits()) != 0 {
394             return Err(SystemError::EINVAL);
395         }
396 
397         if SOCK_NONBLOCK != FileMode::O_NONBLOCK && ((flags & SOCK_NONBLOCK.bits()) != 0) {
398             flags = (flags & !FileMode::O_NONBLOCK.bits()) | FileMode::O_NONBLOCK.bits();
399         }
400 
401         return Self::do_accept(fd, addr, addrlen, flags);
402     }
403 
404     fn do_accept(
405         fd: usize,
406         addr: *mut SockAddr,
407         addrlen: *mut u32,
408         flags: u32,
409     ) -> Result<usize, SystemError> {
410         let socket: Arc<SocketInode> = ProcessManager::current_pcb()
411             .get_socket(fd as i32)
412             .ok_or(SystemError::EBADF)?;
413         // kdebug!("accept: socket={:?}", socket);
414         let mut socket = unsafe { socket.inner_no_preempt() };
415         // 从socket中接收连接
416         let (new_socket, remote_endpoint) = socket.accept()?;
417         drop(socket);
418 
419         // kdebug!("accept: new_socket={:?}", new_socket);
420         // Insert the new socket into the file descriptor vector
421         let new_socket: Arc<SocketInode> = SocketInode::new(new_socket);
422 
423         let mut file_mode = FileMode::O_RDWR;
424         if flags & SOCK_NONBLOCK.bits() != 0 {
425             file_mode |= FileMode::O_NONBLOCK;
426         }
427         if flags & SOCK_CLOEXEC.bits() != 0 {
428             file_mode |= FileMode::O_CLOEXEC;
429         }
430 
431         let new_fd = ProcessManager::current_pcb()
432             .fd_table()
433             .write()
434             .alloc_fd(File::new(new_socket, file_mode)?, None)?;
435         // kdebug!("accept: new_fd={}", new_fd);
436         if !addr.is_null() {
437             // kdebug!("accept: write remote_endpoint to user");
438             // 将对端地址写入用户空间
439             let sockaddr_in = SockAddr::from(remote_endpoint);
440             unsafe {
441                 sockaddr_in.write_to_user(addr, addrlen)?;
442             }
443         }
444         return Ok(new_fd as usize);
445     }
446 
447     /// @brief sys_getsockname系统调用的实际执行函数
448     ///
449     ///  Returns the current address to which the socket
450     ///     sockfd is bound, in the buffer pointed to by addr.
451     ///
452     /// @param fd 文件描述符
453     /// @param addr SockAddr
454     /// @param addrlen 地址长度
455     ///
456     /// @return 成功返回0,失败返回错误码
457     pub fn getsockname(
458         fd: usize,
459         addr: *mut SockAddr,
460         addrlen: *mut u32,
461     ) -> Result<usize, SystemError> {
462         if addr.is_null() {
463             return Err(SystemError::EINVAL);
464         }
465         let socket: Arc<SocketInode> = ProcessManager::current_pcb()
466             .get_socket(fd as i32)
467             .ok_or(SystemError::EBADF)?;
468         let socket = socket.inner();
469         let endpoint: Endpoint = socket.endpoint().ok_or(SystemError::EINVAL)?;
470         drop(socket);
471 
472         let sockaddr_in = SockAddr::from(endpoint);
473         unsafe {
474             sockaddr_in.write_to_user(addr, addrlen)?;
475         }
476         return Ok(0);
477     }
478 
479     /// @brief sys_getpeername系统调用的实际执行函数
480     ///
481     /// @param fd 文件描述符
482     /// @param addr SockAddr
483     /// @param addrlen 地址长度
484     ///
485     /// @return 成功返回0,失败返回错误码
486     pub fn getpeername(
487         fd: usize,
488         addr: *mut SockAddr,
489         addrlen: *mut u32,
490     ) -> Result<usize, SystemError> {
491         if addr.is_null() {
492             return Err(SystemError::EINVAL);
493         }
494 
495         let socket: Arc<SocketInode> = ProcessManager::current_pcb()
496             .get_socket(fd as i32)
497             .ok_or(SystemError::EBADF)?;
498         let socket = socket.inner();
499         let endpoint: Endpoint = socket.peer_endpoint().ok_or(SystemError::EINVAL)?;
500         drop(socket);
501 
502         let sockaddr_in = SockAddr::from(endpoint);
503         unsafe {
504             sockaddr_in.write_to_user(addr, addrlen)?;
505         }
506         return Ok(0);
507     }
508 }
509 
510 // 参考资料: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/netinet_in.h.html#tag_13_32
511 #[repr(C)]
512 #[derive(Debug, Clone, Copy)]
513 pub struct SockAddrIn {
514     pub sin_family: u16,
515     pub sin_port: u16,
516     pub sin_addr: u32,
517     pub sin_zero: [u8; 8],
518 }
519 
520 #[repr(C)]
521 #[derive(Debug, Clone, Copy)]
522 pub struct SockAddrUn {
523     pub sun_family: u16,
524     pub sun_path: [u8; 108],
525 }
526 
527 #[repr(C)]
528 #[derive(Debug, Clone, Copy)]
529 pub struct SockAddrLl {
530     pub sll_family: u16,
531     pub sll_protocol: u16,
532     pub sll_ifindex: u32,
533     pub sll_hatype: u16,
534     pub sll_pkttype: u8,
535     pub sll_halen: u8,
536     pub sll_addr: [u8; 8],
537 }
538 
539 #[repr(C)]
540 #[derive(Debug, Clone, Copy)]
541 pub struct SockAddrNl {
542     nl_family: u16,
543     nl_pad: u16,
544     nl_pid: u32,
545     nl_groups: u32,
546 }
547 
548 #[repr(C)]
549 #[derive(Debug, Clone, Copy)]
550 pub struct SockAddrPlaceholder {
551     pub family: u16,
552     pub data: [u8; 14],
553 }
554 
555 #[repr(C)]
556 #[derive(Clone, Copy)]
557 pub union SockAddr {
558     pub family: u16,
559     pub addr_in: SockAddrIn,
560     pub addr_un: SockAddrUn,
561     pub addr_ll: SockAddrLl,
562     pub addr_nl: SockAddrNl,
563     pub addr_ph: SockAddrPlaceholder,
564 }
565 
566 impl SockAddr {
567     /// @brief 把用户传入的SockAddr转换为Endpoint结构体
568     pub fn to_endpoint(addr: *const SockAddr, len: usize) -> Result<Endpoint, SystemError> {
569         verify_area(
570             VirtAddr::new(addr as usize),
571             core::mem::size_of::<SockAddr>(),
572         )
573         .map_err(|_| SystemError::EFAULT)?;
574 
575         let addr = unsafe { addr.as_ref() }.ok_or(SystemError::EFAULT)?;
576         if len < addr.len()? {
577             return Err(SystemError::EINVAL);
578         }
579         unsafe {
580             match AddressFamily::try_from(addr.family)? {
581                 AddressFamily::INet => {
582                     let addr_in: SockAddrIn = addr.addr_in;
583 
584                     let ip: wire::IpAddress = wire::IpAddress::from(wire::Ipv4Address::from_bytes(
585                         &u32::from_be(addr_in.sin_addr).to_be_bytes()[..],
586                     ));
587                     let port = u16::from_be(addr_in.sin_port);
588 
589                     return Ok(Endpoint::Ip(Some(wire::IpEndpoint::new(ip, port))));
590                 }
591                 AddressFamily::Packet => {
592                     // TODO: support packet socket
593                     return Err(SystemError::EINVAL);
594                 }
595                 AddressFamily::Netlink => {
596                     // TODO: support netlink socket
597                     return Err(SystemError::EINVAL);
598                 }
599                 AddressFamily::Unix => {
600                     return Err(SystemError::EINVAL);
601                 }
602                 _ => {
603                     return Err(SystemError::EINVAL);
604                 }
605             }
606         }
607     }
608 
609     /// @brief 获取地址长度
610     pub fn len(&self) -> Result<usize, SystemError> {
611         let ret = match AddressFamily::try_from(unsafe { self.family })? {
612             AddressFamily::INet => Ok(core::mem::size_of::<SockAddrIn>()),
613             AddressFamily::Packet => Ok(core::mem::size_of::<SockAddrLl>()),
614             AddressFamily::Netlink => Ok(core::mem::size_of::<SockAddrNl>()),
615             AddressFamily::Unix => Err(SystemError::EINVAL),
616             _ => Err(SystemError::EINVAL),
617         };
618 
619         return ret;
620     }
621 
622     /// @brief 把SockAddr的数据写入用户空间
623     ///
624     /// @param addr 用户空间的SockAddr的地址
625     /// @param len 要写入的长度
626     ///
627     /// @return 成功返回写入的长度,失败返回错误码
628     pub unsafe fn write_to_user(
629         &self,
630         addr: *mut SockAddr,
631         addr_len: *mut u32,
632     ) -> Result<usize, SystemError> {
633         // 当用户传入的地址或者长度为空时,直接返回0
634         if addr.is_null() || addr_len.is_null() {
635             return Ok(0);
636         }
637 
638         // 检查用户传入的地址是否合法
639         verify_area(
640             VirtAddr::new(addr as usize),
641             core::mem::size_of::<SockAddr>(),
642         )
643         .map_err(|_| SystemError::EFAULT)?;
644 
645         verify_area(
646             VirtAddr::new(addr_len as usize),
647             core::mem::size_of::<u32>(),
648         )
649         .map_err(|_| SystemError::EFAULT)?;
650 
651         let to_write = min(self.len()?, *addr_len as usize);
652         if to_write > 0 {
653             let buf = core::slice::from_raw_parts_mut(addr as *mut u8, to_write);
654             buf.copy_from_slice(core::slice::from_raw_parts(
655                 self as *const SockAddr as *const u8,
656                 to_write,
657             ));
658         }
659         *addr_len = self.len()? as u32;
660         return Ok(to_write);
661     }
662 }
663 
664 impl From<Endpoint> for SockAddr {
665     fn from(value: Endpoint) -> Self {
666         match value {
667             Endpoint::Ip(ip_endpoint) => {
668                 // 未指定地址
669                 if let None = ip_endpoint {
670                     return SockAddr {
671                         addr_ph: SockAddrPlaceholder {
672                             family: AddressFamily::Unspecified as u16,
673                             data: [0; 14],
674                         },
675                     };
676                 }
677                 // 指定了地址
678                 let ip_endpoint = ip_endpoint.unwrap();
679                 match ip_endpoint.addr {
680                     wire::IpAddress::Ipv4(ipv4_addr) => {
681                         let addr_in = SockAddrIn {
682                             sin_family: AddressFamily::INet as u16,
683                             sin_port: ip_endpoint.port.to_be(),
684                             sin_addr: u32::from_be_bytes(ipv4_addr.0).to_be(),
685                             sin_zero: [0; 8],
686                         };
687 
688                         return SockAddr { addr_in };
689                     }
690                     _ => {
691                         unimplemented!("not support ipv6");
692                     }
693                 }
694             }
695 
696             Endpoint::LinkLayer(link_endpoint) => {
697                 let addr_ll = SockAddrLl {
698                     sll_family: AddressFamily::Packet as u16,
699                     sll_protocol: 0,
700                     sll_ifindex: link_endpoint.interface as u32,
701                     sll_hatype: 0,
702                     sll_pkttype: 0,
703                     sll_halen: 0,
704                     sll_addr: [0; 8],
705                 };
706 
707                 return SockAddr { addr_ll };
708             }
709             _ => {
710                 // todo: support other endpoint, like Netlink...
711                 unimplemented!("not support {value:?}");
712             }
713         }
714     }
715 }
716 
717 #[repr(C)]
718 #[derive(Debug, Clone, Copy)]
719 pub struct MsgHdr {
720     /// 指向一个SockAddr结构体的指针
721     pub msg_name: *mut SockAddr,
722     /// SockAddr结构体的大小
723     pub msg_namelen: u32,
724     /// scatter/gather array
725     pub msg_iov: *mut IoVec,
726     /// elements in msg_iov
727     pub msg_iovlen: usize,
728     /// 辅助数据
729     pub msg_control: *mut u8,
730     /// 辅助数据长度
731     pub msg_controllen: usize,
732     /// 接收到的消息的标志
733     pub msg_flags: u32,
734 }
735 
736 #[derive(Debug, Clone, Copy, FromPrimitive, ToPrimitive, PartialEq, Eq)]
737 pub enum PosixIpProtocol {
738     /// Dummy protocol for TCP.
739     IP = 0,
740     /// Internet Control Message Protocol.
741     ICMP = 1,
742     /// Internet Group Management Protocol.
743     IGMP = 2,
744     /// IPIP tunnels (older KA9Q tunnels use 94).
745     IPIP = 4,
746     /// Transmission Control Protocol.
747     TCP = 6,
748     /// Exterior Gateway Protocol.
749     EGP = 8,
750     /// PUP protocol.
751     PUP = 12,
752     /// User Datagram Protocol.
753     UDP = 17,
754     /// XNS IDP protocol.
755     IDP = 22,
756     /// SO Transport Protocol Class 4.
757     TP = 29,
758     /// Datagram Congestion Control Protocol.
759     DCCP = 33,
760     /// IPv6-in-IPv4 tunnelling.
761     IPv6 = 41,
762     /// RSVP Protocol.
763     RSVP = 46,
764     /// Generic Routing Encapsulation. (Cisco GRE) (rfc 1701, 1702)
765     GRE = 47,
766     /// Encapsulation Security Payload protocol
767     ESP = 50,
768     /// Authentication Header protocol
769     AH = 51,
770     /// Multicast Transport Protocol.
771     MTP = 92,
772     /// IP option pseudo header for BEET
773     BEETPH = 94,
774     /// Encapsulation Header.
775     ENCAP = 98,
776     /// Protocol Independent Multicast.
777     PIM = 103,
778     /// Compression Header Protocol.
779     COMP = 108,
780     /// Stream Control Transport Protocol
781     SCTP = 132,
782     /// UDP-Lite protocol (RFC 3828)
783     UDPLITE = 136,
784     /// MPLS in IP (RFC 4023)
785     MPLSINIP = 137,
786     /// Ethernet-within-IPv6 Encapsulation
787     ETHERNET = 143,
788     /// Raw IP packets
789     RAW = 255,
790     /// Multipath TCP connection
791     MPTCP = 262,
792 }
793 
794 impl TryFrom<u16> for PosixIpProtocol {
795     type Error = SystemError;
796 
797     fn try_from(value: u16) -> Result<Self, Self::Error> {
798         match <Self as FromPrimitive>::from_u16(value) {
799             Some(p) => Ok(p),
800             None => Err(SystemError::EPROTONOSUPPORT),
801         }
802     }
803 }
804 
805 impl Into<u16> for PosixIpProtocol {
806     fn into(self) -> u16 {
807         <Self as ToPrimitive>::to_u16(&self).unwrap()
808     }
809 }
810 
811 #[allow(non_camel_case_types)]
812 #[derive(Debug, Clone, Copy, FromPrimitive, ToPrimitive, PartialEq, Eq)]
813 pub enum PosixSocketOption {
814     SO_DEBUG = 1,
815     SO_REUSEADDR = 2,
816     SO_TYPE = 3,
817     SO_ERROR = 4,
818     SO_DONTROUTE = 5,
819     SO_BROADCAST = 6,
820     SO_SNDBUF = 7,
821     SO_RCVBUF = 8,
822     SO_SNDBUFFORCE = 32,
823     SO_RCVBUFFORCE = 33,
824     SO_KEEPALIVE = 9,
825     SO_OOBINLINE = 10,
826     SO_NO_CHECK = 11,
827     SO_PRIORITY = 12,
828     SO_LINGER = 13,
829     SO_BSDCOMPAT = 14,
830     SO_REUSEPORT = 15,
831     SO_PASSCRED = 16,
832     SO_PEERCRED = 17,
833     SO_RCVLOWAT = 18,
834     SO_SNDLOWAT = 19,
835     SO_RCVTIMEO_OLD = 20,
836     SO_SNDTIMEO_OLD = 21,
837 
838     SO_SECURITY_AUTHENTICATION = 22,
839     SO_SECURITY_ENCRYPTION_TRANSPORT = 23,
840     SO_SECURITY_ENCRYPTION_NETWORK = 24,
841 
842     SO_BINDTODEVICE = 25,
843 
844     /// 与SO_GET_FILTER相同
845     SO_ATTACH_FILTER = 26,
846     SO_DETACH_FILTER = 27,
847 
848     SO_PEERNAME = 28,
849 
850     SO_ACCEPTCONN = 30,
851 
852     SO_PEERSEC = 31,
853     SO_PASSSEC = 34,
854 
855     SO_MARK = 36,
856 
857     SO_PROTOCOL = 38,
858     SO_DOMAIN = 39,
859 
860     SO_RXQ_OVFL = 40,
861 
862     /// 与SCM_WIFI_STATUS相同
863     SO_WIFI_STATUS = 41,
864     SO_PEEK_OFF = 42,
865 
866     /* Instruct lower device to use last 4-bytes of skb data as FCS */
867     SO_NOFCS = 43,
868 
869     SO_LOCK_FILTER = 44,
870     SO_SELECT_ERR_QUEUE = 45,
871     SO_BUSY_POLL = 46,
872     SO_MAX_PACING_RATE = 47,
873     SO_BPF_EXTENSIONS = 48,
874     SO_INCOMING_CPU = 49,
875     SO_ATTACH_BPF = 50,
876     // SO_DETACH_BPF = SO_DETACH_FILTER,
877     SO_ATTACH_REUSEPORT_CBPF = 51,
878     SO_ATTACH_REUSEPORT_EBPF = 52,
879 
880     SO_CNX_ADVICE = 53,
881     SCM_TIMESTAMPING_OPT_STATS = 54,
882     SO_MEMINFO = 55,
883     SO_INCOMING_NAPI_ID = 56,
884     SO_COOKIE = 57,
885     SCM_TIMESTAMPING_PKTINFO = 58,
886     SO_PEERGROUPS = 59,
887     SO_ZEROCOPY = 60,
888     /// 与SCM_TXTIME相同
889     SO_TXTIME = 61,
890 
891     SO_BINDTOIFINDEX = 62,
892 
893     SO_TIMESTAMP_OLD = 29,
894     SO_TIMESTAMPNS_OLD = 35,
895     SO_TIMESTAMPING_OLD = 37,
896     SO_TIMESTAMP_NEW = 63,
897     SO_TIMESTAMPNS_NEW = 64,
898     SO_TIMESTAMPING_NEW = 65,
899 
900     SO_RCVTIMEO_NEW = 66,
901     SO_SNDTIMEO_NEW = 67,
902 
903     SO_DETACH_REUSEPORT_BPF = 68,
904 
905     SO_PREFER_BUSY_POLL = 69,
906     SO_BUSY_POLL_BUDGET = 70,
907 
908     SO_NETNS_COOKIE = 71,
909     SO_BUF_LOCK = 72,
910     SO_RESERVE_MEM = 73,
911     SO_TXREHASH = 74,
912     SO_RCVMARK = 75,
913 }
914 
915 impl TryFrom<i32> for PosixSocketOption {
916     type Error = SystemError;
917 
918     fn try_from(value: i32) -> Result<Self, Self::Error> {
919         match <Self as FromPrimitive>::from_i32(value) {
920             Some(p) => Ok(p),
921             None => Err(SystemError::EINVAL),
922         }
923     }
924 }
925 
926 impl Into<i32> for PosixSocketOption {
927     fn into(self) -> i32 {
928         <Self as ToPrimitive>::to_i32(&self).unwrap()
929     }
930 }
931 
932 #[derive(Debug, Clone, Copy, PartialEq, Eq, FromPrimitive, ToPrimitive)]
933 pub enum PosixTcpSocketOptions {
934     /// Turn off Nagle's algorithm.
935     NoDelay = 1,
936     /// Limit MSS.
937     MaxSegment = 2,
938     /// Never send partially complete segments.
939     Cork = 3,
940     /// Start keeplives after this period.
941     KeepIdle = 4,
942     /// Interval between keepalives.
943     KeepIntvl = 5,
944     /// Number of keepalives before death.
945     KeepCnt = 6,
946     /// Number of SYN retransmits.
947     Syncnt = 7,
948     /// Lifetime for orphaned FIN-WAIT-2 state.
949     Linger2 = 8,
950     /// Wake up listener only when data arrive.
951     DeferAccept = 9,
952     /// Bound advertised window
953     WindowClamp = 10,
954     /// Information about this connection.
955     Info = 11,
956     /// Block/reenable quick acks.
957     QuickAck = 12,
958     /// Congestion control algorithm.
959     Congestion = 13,
960     /// TCP MD5 Signature (RFC2385).
961     Md5Sig = 14,
962     /// Use linear timeouts for thin streams
963     ThinLinearTimeouts = 16,
964     /// Fast retrans. after 1 dupack.
965     ThinDupack = 17,
966     /// How long for loss retry before timeout.
967     UserTimeout = 18,
968     /// TCP sock is under repair right now.
969     Repair = 19,
970     RepairQueue = 20,
971     QueueSeq = 21,
972     RepairOptions = 22,
973     /// Enable FastOpen on listeners
974     FastOpen = 23,
975     Timestamp = 24,
976     /// Limit number of unsent bytes in write queue.
977     NotSentLowat = 25,
978     /// Get Congestion Control (optional) info.
979     CCInfo = 26,
980     /// Record SYN headers for new connections.
981     SaveSyn = 27,
982     /// Get SYN headers recorded for connection.
983     SavedSyn = 28,
984     /// Get/set window parameters.
985     RepairWindow = 29,
986     /// Attempt FastOpen with connect.
987     FastOpenConnect = 30,
988     /// Attach a ULP to a TCP connection.
989     ULP = 31,
990     /// TCP MD5 Signature with extensions.
991     Md5SigExt = 32,
992     /// Set the key for Fast Open(cookie).
993     FastOpenKey = 33,
994     /// Enable TFO without a TFO cookie.
995     FastOpenNoCookie = 34,
996     ZeroCopyReceive = 35,
997     /// Notify bytes available to read as a cmsg on read.
998     /// 与TCP_CM_INQ相同
999     INQ = 36,
1000     /// delay outgoing packets by XX usec
1001     TxDelay = 37,
1002 }
1003 
1004 impl TryFrom<i32> for PosixTcpSocketOptions {
1005     type Error = SystemError;
1006 
1007     fn try_from(value: i32) -> Result<Self, Self::Error> {
1008         match <Self as FromPrimitive>::from_i32(value) {
1009             Some(p) => Ok(p),
1010             None => Err(SystemError::EINVAL),
1011         }
1012     }
1013 }
1014 
1015 impl Into<i32> for PosixTcpSocketOptions {
1016     fn into(self) -> i32 {
1017         <Self as ToPrimitive>::to_i32(&self).unwrap()
1018     }
1019 }
1020