xref: /DragonOS/kernel/src/net/socket/mod.rs (revision 634349e0ebfca487e6aa2761a796f04895908718)
1 use core::{any::Any, fmt::Debug, sync::atomic::AtomicUsize};
2 
3 use alloc::{
4     boxed::Box,
5     collections::LinkedList,
6     string::String,
7     sync::{Arc, Weak},
8     vec::Vec,
9 };
10 use hashbrown::HashMap;
11 use log::warn;
12 use smoltcp::{
13     iface::SocketSet,
14     socket::{self, raw, tcp, udp},
15 };
16 use system_error::SystemError;
17 
18 use crate::{
19     arch::rand::rand,
20     filesystem::vfs::{
21         file::FileMode, syscall::ModeType, FilePrivateData, FileSystem, FileType, IndexNode,
22         Metadata,
23     },
24     libs::{
25         rwlock::{RwLock, RwLockWriteGuard},
26         spinlock::{SpinLock, SpinLockGuard},
27         wait_queue::EventWaitQueue,
28     },
29     process::{Pid, ProcessManager},
30     sched::{schedule, SchedMode},
31 };
32 
33 use self::{
34     handle::GlobalSocketHandle,
35     inet::{RawSocket, TcpSocket, UdpSocket},
36     unix::{SeqpacketSocket, StreamSocket},
37 };
38 
39 use super::{
40     event_poll::{EPollEventType, EPollItem, EventPoll},
41     Endpoint, Protocol, ShutdownType,
42 };
43 
44 pub mod handle;
45 pub mod inet;
46 pub mod unix;
47 
48 lazy_static! {
49     /// 所有socket的集合
50     /// TODO: 优化这里,自己实现SocketSet!!!现在这样的话,不管全局有多少个网卡,每个时间点都只会有1个进程能够访问socket
51     pub static ref SOCKET_SET: SpinLock<SocketSet<'static >> = SpinLock::new(SocketSet::new(vec![]));
52     /// SocketHandle表,每个SocketHandle对应一个SocketHandleItem,
53     /// 注意!:在网卡中断中需要拿到这张表的��,在获取读锁时应该确保关中断避免死锁
54     pub static ref HANDLE_MAP: RwLock<HashMap<GlobalSocketHandle, SocketHandleItem>> = RwLock::new(HashMap::new());
55     /// 端口管理器
56     pub static ref PORT_MANAGER: PortManager = PortManager::new();
57 }
58 
59 /* For setsockopt(2) */
60 // See: linux-5.19.10/include/uapi/asm-generic/socket.h#9
61 pub const SOL_SOCKET: u8 = 1;
62 
63 /// 根据地址族、socket类型和协议创建socket
new_socket( address_family: AddressFamily, socket_type: PosixSocketType, protocol: Protocol, ) -> Result<Box<dyn Socket>, SystemError>64 pub(super) fn new_socket(
65     address_family: AddressFamily,
66     socket_type: PosixSocketType,
67     protocol: Protocol,
68 ) -> Result<Box<dyn Socket>, SystemError> {
69     let socket: Box<dyn Socket> = match address_family {
70         AddressFamily::Unix => match socket_type {
71             PosixSocketType::Stream => Box::new(StreamSocket::new(SocketOptions::default())),
72             PosixSocketType::SeqPacket => Box::new(SeqpacketSocket::new(SocketOptions::default())),
73             _ => {
74                 return Err(SystemError::EINVAL);
75             }
76         },
77         AddressFamily::INet => match socket_type {
78             PosixSocketType::Stream => Box::new(TcpSocket::new(SocketOptions::default())),
79             PosixSocketType::Datagram => Box::new(UdpSocket::new(SocketOptions::default())),
80             PosixSocketType::Raw => Box::new(RawSocket::new(protocol, SocketOptions::default())),
81             _ => {
82                 return Err(SystemError::EINVAL);
83             }
84         },
85         _ => {
86             return Err(SystemError::EAFNOSUPPORT);
87         }
88     };
89 
90     let handle_item = SocketHandleItem::new(Arc::downgrade(&socket.posix_item()));
91     HANDLE_MAP
92         .write_irqsave()
93         .insert(socket.socket_handle(), handle_item);
94     Ok(socket)
95 }
96 
97 pub trait Socket: Sync + Send + Debug + Any {
98     /// @brief 从socket中读取数据,如果socket是阻塞的,那么直到读取到数据才返回
99     ///
100     /// @param buf 读取到的数据存放的缓冲区
101     ///
102     /// @return - 成功:(返回读取的数据的长度,读取数据的端点).
103     ///         - 失败:错误码
read(&self, buf: &mut [u8]) -> (Result<usize, SystemError>, Endpoint)104     fn read(&self, buf: &mut [u8]) -> (Result<usize, SystemError>, Endpoint);
105 
106     /// @brief 向socket中写入数据。如果socket是阻塞的,那么直到写入的数据全部写入socket中才返回
107     ///
108     /// @param buf 要写入的数据
109     /// @param to 要写入的目的端点,如果是None,那么写入的数据将会被丢弃
110     ///
111     /// @return 返回写入的数据的长度
write(&self, buf: &[u8], to: Option<Endpoint>) -> Result<usize, SystemError>112     fn write(&self, buf: &[u8], to: Option<Endpoint>) -> Result<usize, SystemError>;
113 
114     /// @brief 对应于POSIX的connect函数,用于连接到指定的远程服务器端点
115     ///
116     /// It is used to establish a connection to a remote server.
117     /// When a socket is connected to a remote server,
118     /// the operating system will establish a network connection with the server
119     /// and allow data to be sent and received between the local socket and the remote server.
120     ///
121     /// @param endpoint 要连接的端点
122     ///
123     /// @return 返回连接是否成功
connect(&mut self, _endpoint: Endpoint) -> Result<(), SystemError>124     fn connect(&mut self, _endpoint: Endpoint) -> Result<(), SystemError>;
125 
126     /// @brief 对应于POSIX的bind函数,用于绑定到本机指定的端点
127     ///
128     /// The bind() function is used to associate a socket with a particular IP address and port number on the local machine.
129     ///
130     /// @param endpoint 要绑定的端点
131     ///
132     /// @return 返回绑定是否成功
bind(&mut self, _endpoint: Endpoint) -> Result<(), SystemError>133     fn bind(&mut self, _endpoint: Endpoint) -> Result<(), SystemError> {
134         Err(SystemError::ENOSYS)
135     }
136 
137     /// @brief 对应于 POSIX 的 shutdown 函数,用于关闭socket。
138     ///
139     /// shutdown() 函数用于启动网络连接的正常关闭。
140     /// 当在两个端点之间建立网络连接时,任一端点都可以通过调用其端点对象上的 shutdown() 函数来启动关闭序列。
141     /// 此函数向远程端点发送关闭消息以指示本地端点不再接受新数据。
142     ///
143     /// @return 返回是否成功关闭
shutdown(&mut self, _type: ShutdownType) -> Result<(), SystemError>144     fn shutdown(&mut self, _type: ShutdownType) -> Result<(), SystemError> {
145         Err(SystemError::ENOSYS)
146     }
147 
148     /// @brief 对应于POSIX的listen函数,用于监听端点
149     ///
150     /// @param backlog 最大的等待连接数
151     ///
152     /// @return 返回监听是否成功
listen(&mut self, _backlog: usize) -> Result<(), SystemError>153     fn listen(&mut self, _backlog: usize) -> Result<(), SystemError> {
154         Err(SystemError::ENOSYS)
155     }
156 
157     /// @brief 对应于POSIX的accept函数,用于接受连接
158     ///
159     /// @param endpoint 对端的端点
160     ///
161     /// @return 返回接受连接是否成功
accept(&mut self) -> Result<(Box<dyn Socket>, Endpoint), SystemError>162     fn accept(&mut self) -> Result<(Box<dyn Socket>, Endpoint), SystemError> {
163         Err(SystemError::ENOSYS)
164     }
165 
166     /// @brief 获取socket的端点
167     ///
168     /// @return 返回socket的端点
endpoint(&self) -> Option<Endpoint>169     fn endpoint(&self) -> Option<Endpoint> {
170         None
171     }
172 
173     /// @brief 获取socket的对端端点
174     ///
175     /// @return 返回socket的对端端点
peer_endpoint(&self) -> Option<Endpoint>176     fn peer_endpoint(&self) -> Option<Endpoint> {
177         None
178     }
179 
180     /// @brief
181     ///     The purpose of the poll function is to provide
182     ///     a non-blocking way to check if a socket is ready for reading or writing,
183     ///     so that you can efficiently handle multiple sockets in a single thread or event loop.
184     ///
185     /// @return (in, out, err)
186     ///
187     ///     The first boolean value indicates whether the socket is ready for reading. If it is true, then there is data available to be read from the socket without blocking.
188     ///     The second boolean value indicates whether the socket is ready for writing. If it is true, then data can be written to the socket without blocking.
189     ///     The third boolean value indicates whether the socket has encountered an error condition. If it is true, then the socket is in an error state and should be closed or reset
190     ///
poll(&self) -> EPollEventType191     fn poll(&self) -> EPollEventType {
192         EPollEventType::empty()
193     }
194 
195     /// @brief socket的ioctl函数
196     ///
197     /// @param cmd ioctl命令
198     /// @param arg0 ioctl命令的第一个参数
199     /// @param arg1 ioctl命令的第二个参数
200     /// @param arg2 ioctl命令的第三个参数
201     ///
202     /// @return 返回ioctl命令的返回值
ioctl( &self, _cmd: usize, _arg0: usize, _arg1: usize, _arg2: usize, ) -> Result<usize, SystemError>203     fn ioctl(
204         &self,
205         _cmd: usize,
206         _arg0: usize,
207         _arg1: usize,
208         _arg2: usize,
209     ) -> Result<usize, SystemError> {
210         Ok(0)
211     }
212 
213     /// @brief 获取socket的元数据
metadata(&self) -> SocketMetadata214     fn metadata(&self) -> SocketMetadata;
215 
box_clone(&self) -> Box<dyn Socket>216     fn box_clone(&self) -> Box<dyn Socket>;
217 
218     /// @brief 设置socket的选项
219     ///
220     /// @param level 选项的层次
221     /// @param optname 选项的名称
222     /// @param optval 选项的值
223     ///
224     /// @return 返回设置是否成功, 如果不支持该选项,返回ENOSYS
setsockopt( &self, _level: usize, _optname: usize, _optval: &[u8], ) -> Result<(), SystemError>225     fn setsockopt(
226         &self,
227         _level: usize,
228         _optname: usize,
229         _optval: &[u8],
230     ) -> Result<(), SystemError> {
231         warn!("setsockopt is not implemented");
232         Ok(())
233     }
234 
socket_handle(&self) -> GlobalSocketHandle235     fn socket_handle(&self) -> GlobalSocketHandle;
236 
write_buffer(&self, _buf: &[u8]) -> Result<usize, SystemError>237     fn write_buffer(&self, _buf: &[u8]) -> Result<usize, SystemError> {
238         todo!()
239     }
240 
as_any_ref(&self) -> &dyn Any241     fn as_any_ref(&self) -> &dyn Any;
242 
as_any_mut(&mut self) -> &mut dyn Any243     fn as_any_mut(&mut self) -> &mut dyn Any;
244 
add_epoll(&mut self, epitem: Arc<EPollItem>) -> Result<(), SystemError>245     fn add_epoll(&mut self, epitem: Arc<EPollItem>) -> Result<(), SystemError> {
246         let posix_item = self.posix_item();
247         posix_item.add_epoll(epitem);
248         Ok(())
249     }
250 
remove_epoll(&mut self, epoll: &Weak<SpinLock<EventPoll>>) -> Result<(), SystemError>251     fn remove_epoll(&mut self, epoll: &Weak<SpinLock<EventPoll>>) -> Result<(), SystemError> {
252         let posix_item = self.posix_item();
253         posix_item.remove_epoll(epoll)?;
254 
255         Ok(())
256     }
257 
clear_epoll(&mut self) -> Result<(), SystemError>258     fn clear_epoll(&mut self) -> Result<(), SystemError> {
259         let posix_item = self.posix_item();
260 
261         for epitem in posix_item.epitems.lock_irqsave().iter() {
262             let epoll = epitem.epoll();
263 
264             if let Some(epoll) = epoll.upgrade() {
265                 EventPoll::ep_remove(&mut epoll.lock_irqsave(), epitem.fd(), None)?;
266             }
267         }
268 
269         Ok(())
270     }
271 
close(&mut self)272     fn close(&mut self);
273 
posix_item(&self) -> Arc<PosixSocketHandleItem>274     fn posix_item(&self) -> Arc<PosixSocketHandleItem>;
275 }
276 
277 impl Clone for Box<dyn Socket> {
clone(&self) -> Box<dyn Socket>278     fn clone(&self) -> Box<dyn Socket> {
279         self.box_clone()
280     }
281 }
282 
283 /// # Socket在文件系统中的inode封装
284 #[derive(Debug)]
285 pub struct SocketInode(SpinLock<Box<dyn Socket>>, AtomicUsize);
286 
287 impl SocketInode {
new(socket: Box<dyn Socket>) -> Arc<Self>288     pub fn new(socket: Box<dyn Socket>) -> Arc<Self> {
289         Arc::new(Self(SpinLock::new(socket), AtomicUsize::new(0)))
290     }
291 
292     #[inline]
inner(&self) -> SpinLockGuard<Box<dyn Socket>>293     pub fn inner(&self) -> SpinLockGuard<Box<dyn Socket>> {
294         self.0.lock()
295     }
296 
inner_no_preempt(&self) -> SpinLockGuard<Box<dyn Socket>>297     pub unsafe fn inner_no_preempt(&self) -> SpinLockGuard<Box<dyn Socket>> {
298         self.0.lock_no_preempt()
299     }
300 
do_close(&self) -> Result<(), SystemError>301     fn do_close(&self) -> Result<(), SystemError> {
302         let prev_ref_count = self.1.fetch_sub(1, core::sync::atomic::Ordering::SeqCst);
303         if prev_ref_count == 1 {
304             // 最后一次关闭,需要释放
305             let mut socket = self.0.lock_irqsave();
306 
307             if socket.metadata().socket_type == SocketType::Unix {
308                 return Ok(());
309             }
310 
311             if let Some(Endpoint::Ip(Some(ip))) = socket.endpoint() {
312                 PORT_MANAGER.unbind_port(socket.metadata().socket_type, ip.port);
313             }
314 
315             socket.clear_epoll()?;
316 
317             HANDLE_MAP
318                 .write_irqsave()
319                 .remove(&socket.socket_handle())
320                 .unwrap();
321             socket.close();
322         }
323 
324         Ok(())
325     }
326 }
327 
328 impl Drop for SocketInode {
drop(&mut self)329     fn drop(&mut self) {
330         for _ in 0..self.1.load(core::sync::atomic::Ordering::SeqCst) {
331             let _ = self.do_close();
332         }
333     }
334 }
335 
336 impl IndexNode for SocketInode {
open( &self, _data: SpinLockGuard<FilePrivateData>, _mode: &FileMode, ) -> Result<(), SystemError>337     fn open(
338         &self,
339         _data: SpinLockGuard<FilePrivateData>,
340         _mode: &FileMode,
341     ) -> Result<(), SystemError> {
342         self.1.fetch_add(1, core::sync::atomic::Ordering::SeqCst);
343         Ok(())
344     }
345 
close(&self, _data: SpinLockGuard<FilePrivateData>) -> Result<(), SystemError>346     fn close(&self, _data: SpinLockGuard<FilePrivateData>) -> Result<(), SystemError> {
347         self.do_close()
348     }
349 
read_at( &self, _offset: usize, len: usize, buf: &mut [u8], data: SpinLockGuard<FilePrivateData>, ) -> Result<usize, SystemError>350     fn read_at(
351         &self,
352         _offset: usize,
353         len: usize,
354         buf: &mut [u8],
355         data: SpinLockGuard<FilePrivateData>,
356     ) -> Result<usize, SystemError> {
357         drop(data);
358         self.0.lock_no_preempt().read(&mut buf[0..len]).0
359     }
360 
write_at( &self, _offset: usize, len: usize, buf: &[u8], data: SpinLockGuard<FilePrivateData>, ) -> Result<usize, SystemError>361     fn write_at(
362         &self,
363         _offset: usize,
364         len: usize,
365         buf: &[u8],
366         data: SpinLockGuard<FilePrivateData>,
367     ) -> Result<usize, SystemError> {
368         drop(data);
369         self.0.lock_no_preempt().write(&buf[0..len], None)
370     }
371 
poll(&self, _private_data: &FilePrivateData) -> Result<usize, SystemError>372     fn poll(&self, _private_data: &FilePrivateData) -> Result<usize, SystemError> {
373         let events = self.0.lock_irqsave().poll();
374         return Ok(events.bits() as usize);
375     }
376 
fs(&self) -> Arc<dyn FileSystem>377     fn fs(&self) -> Arc<dyn FileSystem> {
378         todo!()
379     }
380 
as_any_ref(&self) -> &dyn Any381     fn as_any_ref(&self) -> &dyn Any {
382         self
383     }
384 
list(&self) -> Result<Vec<String>, SystemError>385     fn list(&self) -> Result<Vec<String>, SystemError> {
386         return Err(SystemError::ENOTDIR);
387     }
388 
metadata(&self) -> Result<Metadata, SystemError>389     fn metadata(&self) -> Result<Metadata, SystemError> {
390         let meta = Metadata {
391             mode: ModeType::from_bits_truncate(0o755),
392             file_type: FileType::Socket,
393             ..Default::default()
394         };
395 
396         return Ok(meta);
397     }
398 
resize(&self, _len: usize) -> Result<(), SystemError>399     fn resize(&self, _len: usize) -> Result<(), SystemError> {
400         return Ok(());
401     }
402 }
403 
404 #[derive(Debug)]
405 pub struct PosixSocketHandleItem {
406     /// socket的waitqueue
407     wait_queue: Arc<EventWaitQueue>,
408 
409     pub epitems: SpinLock<LinkedList<Arc<EPollItem>>>,
410 }
411 
412 impl PosixSocketHandleItem {
new(wait_queue: Option<Arc<EventWaitQueue>>) -> Self413     pub fn new(wait_queue: Option<Arc<EventWaitQueue>>) -> Self {
414         Self {
415             wait_queue: wait_queue.unwrap_or(Arc::new(EventWaitQueue::new())),
416             epitems: SpinLock::new(LinkedList::new()),
417         }
418     }
419     /// ## 在socket的等待队列上睡眠
sleep(&self, events: u64)420     pub fn sleep(&self, events: u64) {
421         unsafe {
422             ProcessManager::preempt_disable();
423             self.wait_queue.sleep_without_schedule(events);
424             ProcessManager::preempt_enable();
425         }
426         schedule(SchedMode::SM_NONE);
427     }
428 
add_epoll(&self, epitem: Arc<EPollItem>)429     pub fn add_epoll(&self, epitem: Arc<EPollItem>) {
430         self.epitems.lock_irqsave().push_back(epitem)
431     }
432 
remove_epoll(&self, epoll: &Weak<SpinLock<EventPoll>>) -> Result<(), SystemError>433     pub fn remove_epoll(&self, epoll: &Weak<SpinLock<EventPoll>>) -> Result<(), SystemError> {
434         let is_remove = !self
435             .epitems
436             .lock_irqsave()
437             .extract_if(|x| x.epoll().ptr_eq(epoll))
438             .collect::<Vec<_>>()
439             .is_empty();
440 
441         if is_remove {
442             return Ok(());
443         }
444 
445         Err(SystemError::ENOENT)
446     }
447 
448     /// ### 唤醒该队列上等待events的进程
449     ///
450     ///  ### 参数
451     /// - events: 发生的事件
452     ///
453     /// 需要注意的是,只要触发了events中的任意一件事件,进程都会被唤醒
wakeup_any(&self, events: u64)454     pub fn wakeup_any(&self, events: u64) {
455         self.wait_queue.wakeup_any(events);
456     }
457 }
458 #[derive(Debug)]
459 pub struct SocketHandleItem {
460     /// 对应的posix socket是否为listen的
461     pub is_posix_listen: bool,
462     /// shutdown状态
463     pub shutdown_type: RwLock<ShutdownType>,
464     pub posix_item: Weak<PosixSocketHandleItem>,
465 }
466 
467 impl SocketHandleItem {
new(posix_item: Weak<PosixSocketHandleItem>) -> Self468     pub fn new(posix_item: Weak<PosixSocketHandleItem>) -> Self {
469         Self {
470             is_posix_listen: false,
471             shutdown_type: RwLock::new(ShutdownType::empty()),
472             posix_item,
473         }
474     }
475 
shutdown_type(&self) -> ShutdownType476     pub fn shutdown_type(&self) -> ShutdownType {
477         *self.shutdown_type.read()
478     }
479 
shutdown_type_writer(&mut self) -> RwLockWriteGuard<ShutdownType>480     pub fn shutdown_type_writer(&mut self) -> RwLockWriteGuard<ShutdownType> {
481         self.shutdown_type.write_irqsave()
482     }
483 
reset_shutdown_type(&self)484     pub fn reset_shutdown_type(&self) {
485         *self.shutdown_type.write() = ShutdownType::empty();
486     }
487 
posix_item(&self) -> Option<Arc<PosixSocketHandleItem>>488     pub fn posix_item(&self) -> Option<Arc<PosixSocketHandleItem>> {
489         self.posix_item.upgrade()
490     }
491 }
492 
493 /// # TCP 和 UDP 的端口管理器。
494 /// 如果 TCP/UDP 的 socket 绑定了某个端口,它会在对应的表中记录,以检测端口冲突。
495 pub struct PortManager {
496     // TCP 端口记录表
497     tcp_port_table: SpinLock<HashMap<u16, Pid>>,
498     // UDP 端口记录表
499     udp_port_table: SpinLock<HashMap<u16, Pid>>,
500 }
501 
502 impl PortManager {
new() -> Self503     pub fn new() -> Self {
504         return Self {
505             tcp_port_table: SpinLock::new(HashMap::new()),
506             udp_port_table: SpinLock::new(HashMap::new()),
507         };
508     }
509 
510     /// @brief 自动分配一个相对应协议中未被使用的PORT,如果动态端口均已被占用,返回错误码 EADDRINUSE
get_ephemeral_port(&self, socket_type: SocketType) -> Result<u16, SystemError>511     pub fn get_ephemeral_port(&self, socket_type: SocketType) -> Result<u16, SystemError> {
512         // TODO: selects non-conflict high port
513 
514         static mut EPHEMERAL_PORT: u16 = 0;
515         unsafe {
516             if EPHEMERAL_PORT == 0 {
517                 EPHEMERAL_PORT = (49152 + rand() % (65536 - 49152)) as u16;
518             }
519         }
520 
521         let mut remaining = 65536 - 49152; // 剩余尝试分配端口次数
522         let mut port: u16;
523         while remaining > 0 {
524             unsafe {
525                 if EPHEMERAL_PORT == 65535 {
526                     EPHEMERAL_PORT = 49152;
527                 } else {
528                     EPHEMERAL_PORT += 1;
529                 }
530                 port = EPHEMERAL_PORT;
531             }
532 
533             // 使用 ListenTable 检查端口是否被占用
534             let listen_table_guard = match socket_type {
535                 SocketType::Udp => self.udp_port_table.lock(),
536                 SocketType::Tcp => self.tcp_port_table.lock(),
537                 _ => panic!("{:?} cann't get a port", socket_type),
538             };
539             if listen_table_guard.get(&port).is_none() {
540                 drop(listen_table_guard);
541                 return Ok(port);
542             }
543             remaining -= 1;
544         }
545         return Err(SystemError::EADDRINUSE);
546     }
547 
548     /// @brief 检测给定端口是否已被占用,如果未被占用则在 TCP/UDP 对应的表中记录
549     ///
550     /// TODO: 增加支持端口复用的逻辑
bind_port(&self, socket_type: SocketType, port: u16) -> Result<(), SystemError>551     pub fn bind_port(&self, socket_type: SocketType, port: u16) -> Result<(), SystemError> {
552         if port > 0 {
553             let mut listen_table_guard = match socket_type {
554                 SocketType::Udp => self.udp_port_table.lock(),
555                 SocketType::Tcp => self.tcp_port_table.lock(),
556                 _ => panic!("{:?} cann't bind a port", socket_type),
557             };
558             match listen_table_guard.get(&port) {
559                 Some(_) => return Err(SystemError::EADDRINUSE),
560                 None => listen_table_guard.insert(port, ProcessManager::current_pid()),
561             };
562             drop(listen_table_guard);
563         }
564         return Ok(());
565     }
566 
567     /// @brief 在对应的端口记录表中将端口和 socket 解绑
568     /// should call this function when socket is closed or aborted
unbind_port(&self, socket_type: SocketType, port: u16)569     pub fn unbind_port(&self, socket_type: SocketType, port: u16) {
570         let mut listen_table_guard = match socket_type {
571             SocketType::Udp => self.udp_port_table.lock(),
572             SocketType::Tcp => self.tcp_port_table.lock(),
573             _ => {
574                 return;
575             }
576         };
577         listen_table_guard.remove(&port);
578         drop(listen_table_guard);
579     }
580 }
581 
582 /// @brief socket的类型
583 #[derive(Debug, Clone, Copy, PartialEq)]
584 pub enum SocketType {
585     /// 原始的socket
586     Raw,
587     /// 用于Tcp通信的 Socket
588     Tcp,
589     /// 用于Udp通信的 Socket
590     Udp,
591     /// unix域的 Socket
592     Unix,
593 }
594 
595 bitflags! {
596     /// @brief socket的选项
597     #[derive(Default)]
598     pub struct SocketOptions: u32 {
599         /// 是否阻塞
600         const BLOCK = 1 << 0;
601         /// 是否允许广播
602         const BROADCAST = 1 << 1;
603         /// 是否允许多播
604         const MULTICAST = 1 << 2;
605         /// 是否允许重用地址
606         const REUSEADDR = 1 << 3;
607         /// 是否允许重用端口
608         const REUSEPORT = 1 << 4;
609     }
610 }
611 
612 #[derive(Debug, Clone)]
613 /// @brief 在trait Socket的metadata函数中返回该结构体供外部使用
614 pub struct SocketMetadata {
615     /// socket的类型
616     pub socket_type: SocketType,
617     /// 接收缓冲区的大小
618     pub rx_buf_size: usize,
619     /// 发送缓冲区的大小
620     pub tx_buf_size: usize,
621     /// 元数据的缓冲区的大小
622     pub metadata_buf_size: usize,
623     /// socket的选项
624     pub options: SocketOptions,
625 }
626 
627 impl SocketMetadata {
new( socket_type: SocketType, rx_buf_size: usize, tx_buf_size: usize, metadata_buf_size: usize, options: SocketOptions, ) -> Self628     fn new(
629         socket_type: SocketType,
630         rx_buf_size: usize,
631         tx_buf_size: usize,
632         metadata_buf_size: usize,
633         options: SocketOptions,
634     ) -> Self {
635         Self {
636             socket_type,
637             rx_buf_size,
638             tx_buf_size,
639             metadata_buf_size,
640             options,
641         }
642     }
643 }
644 
645 /// @brief 地址族的枚举
646 ///
647 /// 参考:https://code.dragonos.org.cn/xref/linux-5.19.10/include/linux/socket.h#180
648 #[derive(Debug, Clone, Copy, PartialEq, Eq, FromPrimitive, ToPrimitive)]
649 pub enum AddressFamily {
650     /// AF_UNSPEC 表示地址族未指定
651     Unspecified = 0,
652     /// AF_UNIX 表示Unix域的socket (与AF_LOCAL相同)
653     Unix = 1,
654     ///  AF_INET 表示IPv4的socket
655     INet = 2,
656     /// AF_AX25 表示AMPR AX.25的socket
657     AX25 = 3,
658     /// AF_IPX 表示IPX的socket
659     IPX = 4,
660     /// AF_APPLETALK 表示Appletalk的socket
661     Appletalk = 5,
662     /// AF_NETROM 表示AMPR NET/ROM的socket
663     Netrom = 6,
664     /// AF_BRIDGE 表示多协议桥接的socket
665     Bridge = 7,
666     /// AF_ATMPVC 表示ATM PVCs的socket
667     Atmpvc = 8,
668     /// AF_X25 表示X.25的socket
669     X25 = 9,
670     /// AF_INET6 表示IPv6的socket
671     INet6 = 10,
672     /// AF_ROSE 表示AMPR ROSE的socket
673     Rose = 11,
674     /// AF_DECnet Reserved for DECnet project
675     Decnet = 12,
676     /// AF_NETBEUI Reserved for 802.2LLC project
677     Netbeui = 13,
678     /// AF_SECURITY 表示Security callback的伪AF
679     Security = 14,
680     /// AF_KEY 表示Key management API
681     Key = 15,
682     /// AF_NETLINK 表示Netlink的socket
683     Netlink = 16,
684     /// AF_PACKET 表示Low level packet interface
685     Packet = 17,
686     /// AF_ASH 表示Ash
687     Ash = 18,
688     /// AF_ECONET 表示Acorn Econet
689     Econet = 19,
690     /// AF_ATMSVC 表示ATM SVCs
691     Atmsvc = 20,
692     /// AF_RDS 表示Reliable Datagram Sockets
693     Rds = 21,
694     /// AF_SNA 表示Linux SNA Project
695     Sna = 22,
696     /// AF_IRDA 表示IRDA sockets
697     Irda = 23,
698     /// AF_PPPOX 表示PPPoX sockets
699     Pppox = 24,
700     /// AF_WANPIPE 表示WANPIPE API sockets
701     WanPipe = 25,
702     /// AF_LLC 表示Linux LLC
703     Llc = 26,
704     /// AF_IB 表示Native InfiniBand address
705     /// 介绍:https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/9/html-single/configuring_infiniband_and_rdma_networks/index#understanding-infiniband-and-rdma_configuring-infiniband-and-rdma-networks
706     Ib = 27,
707     /// AF_MPLS 表示MPLS
708     Mpls = 28,
709     /// AF_CAN 表示Controller Area Network
710     Can = 29,
711     /// AF_TIPC 表示TIPC sockets
712     Tipc = 30,
713     /// AF_BLUETOOTH 表示Bluetooth sockets
714     Bluetooth = 31,
715     /// AF_IUCV 表示IUCV sockets
716     Iucv = 32,
717     /// AF_RXRPC 表示RxRPC sockets
718     Rxrpc = 33,
719     /// AF_ISDN 表示mISDN sockets
720     Isdn = 34,
721     /// AF_PHONET 表示Phonet sockets
722     Phonet = 35,
723     /// AF_IEEE802154 表示IEEE 802.15.4 sockets
724     Ieee802154 = 36,
725     /// AF_CAIF 表示CAIF sockets
726     Caif = 37,
727     /// AF_ALG 表示Algorithm sockets
728     Alg = 38,
729     /// AF_NFC 表示NFC sockets
730     Nfc = 39,
731     /// AF_VSOCK 表示vSockets
732     Vsock = 40,
733     /// AF_KCM 表示Kernel Connection Multiplexor
734     Kcm = 41,
735     /// AF_QIPCRTR 表示Qualcomm IPC Router
736     Qipcrtr = 42,
737     /// AF_SMC 表示SMC-R sockets.
738     /// reserve number for PF_SMC protocol family that reuses AF_INET address family
739     Smc = 43,
740     /// AF_XDP 表示XDP sockets
741     Xdp = 44,
742     /// AF_MCTP 表示Management Component Transport Protocol
743     Mctp = 45,
744     /// AF_MAX 表示最大的地址族
745     Max = 46,
746 }
747 
748 impl TryFrom<u16> for AddressFamily {
749     type Error = SystemError;
try_from(x: u16) -> Result<Self, Self::Error>750     fn try_from(x: u16) -> Result<Self, Self::Error> {
751         use num_traits::FromPrimitive;
752         return <Self as FromPrimitive>::from_u16(x).ok_or(SystemError::EINVAL);
753     }
754 }
755 
756 /// @brief posix套接字类型的枚举(这些值与linux内核中的值一致)
757 #[derive(Debug, Clone, Copy, PartialEq, Eq, FromPrimitive, ToPrimitive)]
758 pub enum PosixSocketType {
759     Stream = 1,
760     Datagram = 2,
761     Raw = 3,
762     Rdm = 4,
763     SeqPacket = 5,
764     Dccp = 6,
765     Packet = 10,
766 }
767 
768 impl TryFrom<u8> for PosixSocketType {
769     type Error = SystemError;
try_from(x: u8) -> Result<Self, Self::Error>770     fn try_from(x: u8) -> Result<Self, Self::Error> {
771         use num_traits::FromPrimitive;
772         return <Self as FromPrimitive>::from_u8(x).ok_or(SystemError::EINVAL);
773     }
774 }
775 
776 /// ### 为socket提供无锁的poll方法
777 ///
778 /// 因为在网卡中断中,需要轮询socket的状态,如果使用socket文件或者其inode来poll
779 /// 在当前的设计,会必然死锁,所以引用这一个设计来解决,提供无��的poll
780 pub struct SocketPollMethod;
781 
782 impl SocketPollMethod {
poll(socket: &socket::Socket, handle_item: &SocketHandleItem) -> EPollEventType783     pub fn poll(socket: &socket::Socket, handle_item: &SocketHandleItem) -> EPollEventType {
784         let shutdown = handle_item.shutdown_type();
785         match socket {
786             socket::Socket::Udp(udp) => Self::udp_poll(udp, shutdown),
787             socket::Socket::Tcp(tcp) => Self::tcp_poll(tcp, shutdown, handle_item.is_posix_listen),
788             socket::Socket::Raw(raw) => Self::raw_poll(raw, shutdown),
789             _ => todo!(),
790         }
791     }
792 
tcp_poll( socket: &tcp::Socket, shutdown: ShutdownType, is_posix_listen: bool, ) -> EPollEventType793     pub fn tcp_poll(
794         socket: &tcp::Socket,
795         shutdown: ShutdownType,
796         is_posix_listen: bool,
797     ) -> EPollEventType {
798         let mut events = EPollEventType::empty();
799         // debug!("enter tcp_poll! is_posix_listen:{}", is_posix_listen);
800         // 处理listen的socket
801         if is_posix_listen {
802             // 如果是listen的socket,那么只有EPOLLIN和EPOLLRDNORM
803             if socket.is_active() {
804                 events.insert(EPollEventType::EPOLL_LISTEN_CAN_ACCEPT);
805             }
806 
807             // debug!("tcp_poll listen socket! events:{:?}", events);
808             return events;
809         }
810 
811         let state = socket.state();
812 
813         if shutdown == ShutdownType::SHUTDOWN_MASK || state == tcp::State::Closed {
814             events.insert(EPollEventType::EPOLLHUP);
815         }
816 
817         if shutdown.contains(ShutdownType::RCV_SHUTDOWN) {
818             events.insert(
819                 EPollEventType::EPOLLIN | EPollEventType::EPOLLRDNORM | EPollEventType::EPOLLRDHUP,
820             );
821         }
822 
823         // Connected or passive Fast Open socket?
824         if state != tcp::State::SynSent && state != tcp::State::SynReceived {
825             // socket有可读数据
826             if socket.can_recv() {
827                 events.insert(EPollEventType::EPOLLIN | EPollEventType::EPOLLRDNORM);
828             }
829 
830             if !(shutdown.contains(ShutdownType::SEND_SHUTDOWN)) {
831                 // 缓冲区可写(这里判断可写的逻辑好像跟linux不太一样)
832                 if socket.send_queue() < socket.send_capacity() {
833                     events.insert(EPollEventType::EPOLLOUT | EPollEventType::EPOLLWRNORM);
834                 } else {
835                     // TODO:触发缓冲区已满的信号SIGIO
836                     todo!("A signal SIGIO that the buffer is full needs to be sent");
837                 }
838             } else {
839                 // 如果我们的socket关闭了SEND_SHUTDOWN,epoll事件就是EPOLLOUT
840                 events.insert(EPollEventType::EPOLLOUT | EPollEventType::EPOLLWRNORM);
841             }
842         } else if state == tcp::State::SynSent {
843             events.insert(EPollEventType::EPOLLOUT | EPollEventType::EPOLLWRNORM);
844         }
845 
846         // socket发生错误
847         // TODO: 这里的逻辑可能有问题,需要进一步验证是否is_active()==false就代表socket发生错误
848         if !socket.is_active() {
849             events.insert(EPollEventType::EPOLLERR);
850         }
851 
852         events
853     }
854 
udp_poll(socket: &udp::Socket, shutdown: ShutdownType) -> EPollEventType855     pub fn udp_poll(socket: &udp::Socket, shutdown: ShutdownType) -> EPollEventType {
856         let mut event = EPollEventType::empty();
857 
858         if shutdown.contains(ShutdownType::RCV_SHUTDOWN) {
859             event.insert(
860                 EPollEventType::EPOLLRDHUP | EPollEventType::EPOLLIN | EPollEventType::EPOLLRDNORM,
861             );
862         }
863         if shutdown.contains(ShutdownType::SHUTDOWN_MASK) {
864             event.insert(EPollEventType::EPOLLHUP);
865         }
866 
867         if socket.can_recv() {
868             event.insert(EPollEventType::EPOLLIN | EPollEventType::EPOLLRDNORM);
869         }
870 
871         if socket.can_send() {
872             event.insert(
873                 EPollEventType::EPOLLOUT
874                     | EPollEventType::EPOLLWRNORM
875                     | EPollEventType::EPOLLWRBAND,
876             );
877         } else {
878             // TODO: 缓冲区空间不够,需要使用信号处理
879             todo!()
880         }
881 
882         return event;
883     }
884 
raw_poll(socket: &raw::Socket, shutdown: ShutdownType) -> EPollEventType885     pub fn raw_poll(socket: &raw::Socket, shutdown: ShutdownType) -> EPollEventType {
886         //debug!("enter raw_poll!");
887         let mut event = EPollEventType::empty();
888 
889         if shutdown.contains(ShutdownType::RCV_SHUTDOWN) {
890             event.insert(
891                 EPollEventType::EPOLLRDHUP | EPollEventType::EPOLLIN | EPollEventType::EPOLLRDNORM,
892             );
893         }
894         if shutdown.contains(ShutdownType::SHUTDOWN_MASK) {
895             event.insert(EPollEventType::EPOLLHUP);
896         }
897 
898         if socket.can_recv() {
899             //debug!("poll can recv!");
900             event.insert(EPollEventType::EPOLLIN | EPollEventType::EPOLLRDNORM);
901         } else {
902             //debug!("poll can not recv!");
903         }
904 
905         if socket.can_send() {
906             //debug!("poll can send!");
907             event.insert(
908                 EPollEventType::EPOLLOUT
909                     | EPollEventType::EPOLLWRNORM
910                     | EPollEventType::EPOLLWRBAND,
911             );
912         } else {
913             //debug!("poll can not send!");
914             // TODO: 缓冲区空间不够,需要使用信号处理
915             todo!()
916         }
917         return event;
918     }
919 }
920