1 mod bpf; 2 mod kprobe; 3 mod util; 4 5 use crate::filesystem::vfs::file::{File, FileMode, PageCache}; 6 use crate::filesystem::vfs::syscall::ModeType; 7 use crate::filesystem::vfs::{ 8 FilePrivateData, FileSystem, FileType, FsInfo, IndexNode, Metadata, SuperBlock, 9 }; 10 use crate::include::bindings::linux_bpf::{ 11 perf_event_attr, perf_event_sample_format, perf_sw_ids, perf_type_id, 12 }; 13 use crate::libs::casting::DowncastArc; 14 use crate::libs::spinlock::{SpinLock, SpinLockGuard}; 15 use crate::mm::fault::{PageFaultHandler, PageFaultMessage}; 16 use crate::mm::VmFaultReason; 17 use crate::net::event_poll::{EPollEventType, EPollItem, EventPoll, KernelIoctlData}; 18 use crate::perf::bpf::BpfPerfEvent; 19 use crate::perf::util::{PerfEventIoc, PerfEventOpenFlags, PerfProbeArgs}; 20 use crate::process::ProcessManager; 21 use crate::syscall::user_access::UserBufferReader; 22 use crate::syscall::Syscall; 23 use alloc::boxed::Box; 24 use alloc::collections::LinkedList; 25 use alloc::string::String; 26 use alloc::sync::{Arc, Weak}; 27 use alloc::vec::Vec; 28 use core::any::Any; 29 use core::ffi::c_void; 30 use core::fmt::Debug; 31 use core::ops::Deref; 32 use intertrait::{CastFrom, CastFromSync}; 33 use log::info; 34 use num_traits::FromPrimitive; 35 use system_error::SystemError; 36 37 type Result<T> = core::result::Result<T, SystemError>; 38 39 pub trait PerfEventOps: Send + Sync + Debug + CastFromSync + CastFrom + IndexNode { 40 /// Set the bpf program for the perf event 41 fn set_bpf_prog(&self, _bpf_prog: Arc<File>) -> Result<()> { 42 Err(SystemError::ENOSYS) 43 } 44 /// Enable the perf event 45 fn enable(&self) -> Result<()> { 46 Err(SystemError::ENOSYS) 47 } 48 /// Disable the perf event 49 fn disable(&self) -> Result<()> { 50 Err(SystemError::ENOSYS) 51 } 52 /// Whether the perf event is readable 53 fn readable(&self) -> bool; 54 } 55 56 #[derive(Debug)] 57 pub struct PerfEventInode { 58 event: Box<dyn PerfEventOps>, 59 epitems: SpinLock<LinkedList<Arc<EPollItem>>>, 60 } 61 62 impl PerfEventInode { 63 pub fn new(event: Box<dyn PerfEventOps>) -> Self { 64 Self { 65 event, 66 epitems: SpinLock::new(LinkedList::new()), 67 } 68 } 69 pub fn remove_epoll( 70 &self, 71 epoll: &Weak<SpinLock<EventPoll>>, 72 ) -> core::result::Result<(), SystemError> { 73 let is_remove = !self 74 .epitems 75 .lock_irqsave() 76 .extract_if(|x| x.epoll().ptr_eq(epoll)) 77 .collect::<Vec<_>>() 78 .is_empty(); 79 if is_remove { 80 return Ok(()); 81 } 82 Err(SystemError::ENOENT) 83 } 84 fn do_poll(&self) -> Result<usize> { 85 let mut events = EPollEventType::empty(); 86 if self.event.readable() { 87 events |= EPollEventType::EPOLLIN | EPollEventType::EPOLLRDNORM; 88 } 89 return Ok(events.bits() as usize); 90 } 91 fn epoll_callback(&self) -> Result<()> { 92 let pollflag = EPollEventType::from_bits_truncate(self.do_poll()? as u32); 93 // 唤醒epoll中等待的进程 94 EventPoll::wakeup_epoll(&self.epitems, Some(pollflag)) 95 } 96 } 97 98 impl Deref for PerfEventInode { 99 type Target = Box<dyn PerfEventOps>; 100 101 fn deref(&self) -> &Self::Target { 102 &self.event 103 } 104 } 105 106 impl IndexNode for PerfEventInode { 107 fn mmap(&self, start: usize, len: usize, offset: usize) -> Result<()> { 108 self.event.mmap(start, len, offset) 109 } 110 fn open(&self, _data: SpinLockGuard<FilePrivateData>, _mode: &FileMode) -> Result<()> { 111 Ok(()) 112 } 113 fn close(&self, _data: SpinLockGuard<FilePrivateData>) -> Result<()> { 114 Ok(()) 115 } 116 fn read_at( 117 &self, 118 _offset: usize, 119 _len: usize, 120 _buf: &mut [u8], 121 _data: SpinLockGuard<FilePrivateData>, 122 ) -> Result<usize> { 123 panic!("read_at not implemented for PerfEvent"); 124 } 125 126 fn write_at( 127 &self, 128 _offset: usize, 129 _len: usize, 130 _buf: &[u8], 131 _data: SpinLockGuard<FilePrivateData>, 132 ) -> Result<usize> { 133 panic!("write_at not implemented for PerfEvent"); 134 } 135 136 fn poll(&self, _private_data: &FilePrivateData) -> Result<usize> { 137 self.do_poll() 138 } 139 140 fn metadata(&self) -> Result<Metadata> { 141 let meta = Metadata { 142 mode: ModeType::from_bits_truncate(0o755), 143 file_type: FileType::File, 144 ..Default::default() 145 }; 146 Ok(meta) 147 } 148 149 fn resize(&self, _len: usize) -> Result<()> { 150 Ok(()) 151 } 152 153 fn ioctl(&self, cmd: u32, data: usize, _private_data: &FilePrivateData) -> Result<usize> { 154 let req = PerfEventIoc::from_u32(cmd).ok_or(SystemError::EINVAL)?; 155 info!("perf_event_ioctl: request: {:?}, arg: {}", req, data); 156 match req { 157 PerfEventIoc::Enable => { 158 self.event.enable()?; 159 Ok(0) 160 } 161 PerfEventIoc::Disable => { 162 self.event.disable()?; 163 Ok(0) 164 } 165 PerfEventIoc::SetBpf => { 166 info!("perf_event_ioctl: PERF_EVENT_IOC_SET_BPF, arg: {}", data); 167 let bpf_prog_fd = data; 168 let fd_table = ProcessManager::current_pcb().fd_table(); 169 let file = fd_table 170 .read() 171 .get_file_by_fd(bpf_prog_fd as _) 172 .ok_or(SystemError::EBADF)?; 173 self.event.set_bpf_prog(file)?; 174 Ok(0) 175 } 176 } 177 } 178 179 fn kernel_ioctl( 180 &self, 181 arg: Arc<dyn KernelIoctlData>, 182 _data: &FilePrivateData, 183 ) -> core::result::Result<usize, SystemError> { 184 let epitem = arg 185 .arc_any() 186 .downcast::<EPollItem>() 187 .map_err(|_| SystemError::EFAULT)?; 188 self.epitems.lock().push_back(epitem); 189 Ok(0) 190 } 191 192 fn fs(&self) -> Arc<dyn FileSystem> { 193 // panic!("PerfEvent does not have a filesystem") 194 Arc::new(PerfFakeFs) 195 } 196 fn as_any_ref(&self) -> &dyn Any { 197 self 198 } 199 fn list(&self) -> Result<Vec<String>> { 200 Err(SystemError::ENOSYS) 201 } 202 fn page_cache(&self) -> Option<Arc<PageCache>> { 203 self.event.page_cache() 204 } 205 } 206 207 #[derive(Debug)] 208 struct PerfFakeFs; 209 210 impl FileSystem for PerfFakeFs { 211 fn root_inode(&self) -> Arc<dyn IndexNode> { 212 panic!("PerfFakeFs does not have a root inode") 213 } 214 215 fn info(&self) -> FsInfo { 216 panic!("PerfFakeFs does not have a filesystem info") 217 } 218 219 fn as_any_ref(&self) -> &dyn Any { 220 self 221 } 222 223 fn name(&self) -> &str { 224 "perf" 225 } 226 227 fn super_block(&self) -> SuperBlock { 228 panic!("PerfFakeFs does not have a super block") 229 } 230 unsafe fn fault(&self, pfm: &mut PageFaultMessage) -> VmFaultReason { 231 let res = PageFaultHandler::filemap_fault(pfm); 232 res 233 } 234 unsafe fn map_pages( 235 &self, 236 pfm: &mut PageFaultMessage, 237 start_pgoff: usize, 238 end_pgoff: usize, 239 ) -> VmFaultReason { 240 PageFaultHandler::filemap_map_pages(pfm, start_pgoff, end_pgoff) 241 } 242 } 243 244 impl Syscall { 245 pub fn sys_perf_event_open( 246 attr: *const u8, 247 pid: i32, 248 cpu: i32, 249 group_fd: i32, 250 flags: u32, 251 ) -> Result<usize> { 252 let buf = UserBufferReader::new( 253 attr as *const perf_event_attr, 254 size_of::<perf_event_attr>(), 255 true, 256 )?; 257 let attr = buf.read_one_from_user(0)?; 258 perf_event_open(attr, pid, cpu, group_fd, flags) 259 } 260 } 261 262 pub fn perf_event_open( 263 attr: &perf_event_attr, 264 pid: i32, 265 cpu: i32, 266 group_fd: i32, 267 flags: u32, 268 ) -> Result<usize> { 269 let args = PerfProbeArgs::try_from(attr, pid, cpu, group_fd, flags)?; 270 log::info!("perf_event_process: {:#?}", args); 271 let file_mode = if args 272 .flags 273 .contains(PerfEventOpenFlags::PERF_FLAG_FD_CLOEXEC) 274 { 275 FileMode::O_RDWR | FileMode::O_CLOEXEC 276 } else { 277 FileMode::O_RDWR 278 }; 279 280 let event: Box<dyn PerfEventOps> = match args.type_ { 281 // Kprobe 282 // See /sys/bus/event_source/devices/kprobe/type 283 perf_type_id::PERF_TYPE_MAX => { 284 let kprobe_event = kprobe::perf_event_open_kprobe(args); 285 Box::new(kprobe_event) 286 } 287 perf_type_id::PERF_TYPE_SOFTWARE => { 288 // For bpf prog output 289 assert_eq!(args.config, perf_sw_ids::PERF_COUNT_SW_BPF_OUTPUT); 290 assert_eq!( 291 args.sample_type, 292 Some(perf_event_sample_format::PERF_SAMPLE_RAW) 293 ); 294 let bpf_event = bpf::perf_event_open_bpf(args); 295 Box::new(bpf_event) 296 } 297 _ => { 298 unimplemented!("perf_event_process: unknown type: {:?}", args); 299 } 300 }; 301 302 let page_cache = event.page_cache(); 303 let perf_event = Arc::new(PerfEventInode::new(event)); 304 if let Some(cache) = page_cache { 305 cache.set_inode(Arc::downgrade(&(perf_event.clone() as _)))?; 306 } 307 let file = File::new(perf_event, file_mode)?; 308 let fd_table = ProcessManager::current_pcb().fd_table(); 309 let fd = fd_table.write().alloc_fd(file, None).map(|x| x as usize)?; 310 Ok(fd) 311 } 312 313 pub fn perf_event_output(_ctx: *mut c_void, fd: usize, _flags: u32, data: &[u8]) -> Result<()> { 314 let file = get_perf_event_file(fd)?; 315 let bpf_event_file = file.deref().deref(); 316 let bpf_event_file = bpf_event_file 317 .deref() 318 .ref_any() 319 .downcast_ref::<BpfPerfEvent>() 320 .ok_or(SystemError::EINVAL)?; 321 bpf_event_file.write_event(data)?; 322 file.epoll_callback()?; 323 Ok(()) 324 } 325 326 fn get_perf_event_file(fd: usize) -> Result<Arc<PerfEventInode>> { 327 let fd_table = ProcessManager::current_pcb().fd_table(); 328 let file = fd_table 329 .read() 330 .get_file_by_fd(fd as _) 331 .ok_or(SystemError::EBADF)?; 332 let event = file 333 .inode() 334 .downcast_arc::<PerfEventInode>() 335 .ok_or(SystemError::EINVAL)?; 336 Ok(event) 337 } 338