xref: /DragonOS/kernel/src/perf/mod.rs (revision 7b0ef10895108a0de5ff5ef3d2f93f40cf2e33a5)
1 mod bpf;
2 mod kprobe;
3 mod util;
4 
5 use crate::filesystem::vfs::file::{File, FileMode, PageCache};
6 use crate::filesystem::vfs::syscall::ModeType;
7 use crate::filesystem::vfs::{
8     FilePrivateData, FileSystem, FileType, FsInfo, IndexNode, Metadata, SuperBlock,
9 };
10 use crate::include::bindings::linux_bpf::{
11     perf_event_attr, perf_event_sample_format, perf_sw_ids, perf_type_id,
12 };
13 use crate::libs::casting::DowncastArc;
14 use crate::libs::spinlock::{SpinLock, SpinLockGuard};
15 use crate::mm::fault::{PageFaultHandler, PageFaultMessage};
16 use crate::mm::VmFaultReason;
17 use crate::net::event_poll::{EPollEventType, EPollItem, EventPoll, KernelIoctlData};
18 use crate::perf::bpf::BpfPerfEvent;
19 use crate::perf::util::{PerfEventIoc, PerfEventOpenFlags, PerfProbeArgs};
20 use crate::process::ProcessManager;
21 use crate::syscall::user_access::UserBufferReader;
22 use crate::syscall::Syscall;
23 use alloc::boxed::Box;
24 use alloc::collections::LinkedList;
25 use alloc::string::String;
26 use alloc::sync::{Arc, Weak};
27 use alloc::vec::Vec;
28 use core::any::Any;
29 use core::ffi::c_void;
30 use core::fmt::Debug;
31 use core::ops::Deref;
32 use intertrait::{CastFrom, CastFromSync};
33 use log::info;
34 use num_traits::FromPrimitive;
35 use system_error::SystemError;
36 
37 type Result<T> = core::result::Result<T, SystemError>;
38 
39 pub trait PerfEventOps: Send + Sync + Debug + CastFromSync + CastFrom + IndexNode {
40     /// Set the bpf program for the perf event
41     fn set_bpf_prog(&self, _bpf_prog: Arc<File>) -> Result<()> {
42         Err(SystemError::ENOSYS)
43     }
44     /// Enable the perf event
45     fn enable(&self) -> Result<()> {
46         Err(SystemError::ENOSYS)
47     }
48     /// Disable the perf event
49     fn disable(&self) -> Result<()> {
50         Err(SystemError::ENOSYS)
51     }
52     /// Whether the perf event is readable
53     fn readable(&self) -> bool;
54 }
55 
56 #[derive(Debug)]
57 pub struct PerfEventInode {
58     event: Box<dyn PerfEventOps>,
59     epitems: SpinLock<LinkedList<Arc<EPollItem>>>,
60 }
61 
62 impl PerfEventInode {
63     pub fn new(event: Box<dyn PerfEventOps>) -> Self {
64         Self {
65             event,
66             epitems: SpinLock::new(LinkedList::new()),
67         }
68     }
69     pub fn remove_epoll(
70         &self,
71         epoll: &Weak<SpinLock<EventPoll>>,
72     ) -> core::result::Result<(), SystemError> {
73         let is_remove = !self
74             .epitems
75             .lock_irqsave()
76             .extract_if(|x| x.epoll().ptr_eq(epoll))
77             .collect::<Vec<_>>()
78             .is_empty();
79         if is_remove {
80             return Ok(());
81         }
82         Err(SystemError::ENOENT)
83     }
84     fn do_poll(&self) -> Result<usize> {
85         let mut events = EPollEventType::empty();
86         if self.event.readable() {
87             events |= EPollEventType::EPOLLIN | EPollEventType::EPOLLRDNORM;
88         }
89         return Ok(events.bits() as usize);
90     }
91     fn epoll_callback(&self) -> Result<()> {
92         let pollflag = EPollEventType::from_bits_truncate(self.do_poll()? as u32);
93         // 唤醒epoll中等待的进程
94         EventPoll::wakeup_epoll(&self.epitems, Some(pollflag))
95     }
96 }
97 
98 impl Deref for PerfEventInode {
99     type Target = Box<dyn PerfEventOps>;
100 
101     fn deref(&self) -> &Self::Target {
102         &self.event
103     }
104 }
105 
106 impl IndexNode for PerfEventInode {
107     fn mmap(&self, start: usize, len: usize, offset: usize) -> Result<()> {
108         self.event.mmap(start, len, offset)
109     }
110     fn open(&self, _data: SpinLockGuard<FilePrivateData>, _mode: &FileMode) -> Result<()> {
111         Ok(())
112     }
113     fn close(&self, _data: SpinLockGuard<FilePrivateData>) -> Result<()> {
114         Ok(())
115     }
116     fn read_at(
117         &self,
118         _offset: usize,
119         _len: usize,
120         _buf: &mut [u8],
121         _data: SpinLockGuard<FilePrivateData>,
122     ) -> Result<usize> {
123         panic!("read_at not implemented for PerfEvent");
124     }
125 
126     fn write_at(
127         &self,
128         _offset: usize,
129         _len: usize,
130         _buf: &[u8],
131         _data: SpinLockGuard<FilePrivateData>,
132     ) -> Result<usize> {
133         panic!("write_at not implemented for PerfEvent");
134     }
135 
136     fn poll(&self, _private_data: &FilePrivateData) -> Result<usize> {
137         self.do_poll()
138     }
139 
140     fn metadata(&self) -> Result<Metadata> {
141         let meta = Metadata {
142             mode: ModeType::from_bits_truncate(0o755),
143             file_type: FileType::File,
144             ..Default::default()
145         };
146         Ok(meta)
147     }
148 
149     fn resize(&self, _len: usize) -> Result<()> {
150         Ok(())
151     }
152 
153     fn ioctl(&self, cmd: u32, data: usize, _private_data: &FilePrivateData) -> Result<usize> {
154         let req = PerfEventIoc::from_u32(cmd).ok_or(SystemError::EINVAL)?;
155         info!("perf_event_ioctl: request: {:?}, arg: {}", req, data);
156         match req {
157             PerfEventIoc::Enable => {
158                 self.event.enable()?;
159                 Ok(0)
160             }
161             PerfEventIoc::Disable => {
162                 self.event.disable()?;
163                 Ok(0)
164             }
165             PerfEventIoc::SetBpf => {
166                 info!("perf_event_ioctl: PERF_EVENT_IOC_SET_BPF, arg: {}", data);
167                 let bpf_prog_fd = data;
168                 let fd_table = ProcessManager::current_pcb().fd_table();
169                 let file = fd_table
170                     .read()
171                     .get_file_by_fd(bpf_prog_fd as _)
172                     .ok_or(SystemError::EBADF)?;
173                 self.event.set_bpf_prog(file)?;
174                 Ok(0)
175             }
176         }
177     }
178 
179     fn kernel_ioctl(
180         &self,
181         arg: Arc<dyn KernelIoctlData>,
182         _data: &FilePrivateData,
183     ) -> core::result::Result<usize, SystemError> {
184         let epitem = arg
185             .arc_any()
186             .downcast::<EPollItem>()
187             .map_err(|_| SystemError::EFAULT)?;
188         self.epitems.lock().push_back(epitem);
189         Ok(0)
190     }
191 
192     fn fs(&self) -> Arc<dyn FileSystem> {
193         // panic!("PerfEvent does not have a filesystem")
194         Arc::new(PerfFakeFs)
195     }
196     fn as_any_ref(&self) -> &dyn Any {
197         self
198     }
199     fn list(&self) -> Result<Vec<String>> {
200         Err(SystemError::ENOSYS)
201     }
202     fn page_cache(&self) -> Option<Arc<PageCache>> {
203         self.event.page_cache()
204     }
205 }
206 
207 #[derive(Debug)]
208 struct PerfFakeFs;
209 
210 impl FileSystem for PerfFakeFs {
211     fn root_inode(&self) -> Arc<dyn IndexNode> {
212         panic!("PerfFakeFs does not have a root inode")
213     }
214 
215     fn info(&self) -> FsInfo {
216         panic!("PerfFakeFs does not have a filesystem info")
217     }
218 
219     fn as_any_ref(&self) -> &dyn Any {
220         self
221     }
222 
223     fn name(&self) -> &str {
224         "perf"
225     }
226 
227     fn super_block(&self) -> SuperBlock {
228         panic!("PerfFakeFs does not have a super block")
229     }
230     unsafe fn fault(&self, pfm: &mut PageFaultMessage) -> VmFaultReason {
231         let res = PageFaultHandler::filemap_fault(pfm);
232         res
233     }
234     unsafe fn map_pages(
235         &self,
236         pfm: &mut PageFaultMessage,
237         start_pgoff: usize,
238         end_pgoff: usize,
239     ) -> VmFaultReason {
240         PageFaultHandler::filemap_map_pages(pfm, start_pgoff, end_pgoff)
241     }
242 }
243 
244 impl Syscall {
245     pub fn sys_perf_event_open(
246         attr: *const u8,
247         pid: i32,
248         cpu: i32,
249         group_fd: i32,
250         flags: u32,
251     ) -> Result<usize> {
252         let buf = UserBufferReader::new(
253             attr as *const perf_event_attr,
254             size_of::<perf_event_attr>(),
255             true,
256         )?;
257         let attr = buf.read_one_from_user(0)?;
258         perf_event_open(attr, pid, cpu, group_fd, flags)
259     }
260 }
261 
262 pub fn perf_event_open(
263     attr: &perf_event_attr,
264     pid: i32,
265     cpu: i32,
266     group_fd: i32,
267     flags: u32,
268 ) -> Result<usize> {
269     let args = PerfProbeArgs::try_from(attr, pid, cpu, group_fd, flags)?;
270     log::info!("perf_event_process: {:#?}", args);
271     let file_mode = if args
272         .flags
273         .contains(PerfEventOpenFlags::PERF_FLAG_FD_CLOEXEC)
274     {
275         FileMode::O_RDWR | FileMode::O_CLOEXEC
276     } else {
277         FileMode::O_RDWR
278     };
279 
280     let event: Box<dyn PerfEventOps> = match args.type_ {
281         // Kprobe
282         // See /sys/bus/event_source/devices/kprobe/type
283         perf_type_id::PERF_TYPE_MAX => {
284             let kprobe_event = kprobe::perf_event_open_kprobe(args);
285             Box::new(kprobe_event)
286         }
287         perf_type_id::PERF_TYPE_SOFTWARE => {
288             // For bpf prog output
289             assert_eq!(args.config, perf_sw_ids::PERF_COUNT_SW_BPF_OUTPUT);
290             assert_eq!(
291                 args.sample_type,
292                 Some(perf_event_sample_format::PERF_SAMPLE_RAW)
293             );
294             let bpf_event = bpf::perf_event_open_bpf(args);
295             Box::new(bpf_event)
296         }
297         _ => {
298             unimplemented!("perf_event_process: unknown type: {:?}", args);
299         }
300     };
301 
302     let page_cache = event.page_cache();
303     let perf_event = Arc::new(PerfEventInode::new(event));
304     if let Some(cache) = page_cache {
305         cache.set_inode(Arc::downgrade(&(perf_event.clone() as _)))?;
306     }
307     let file = File::new(perf_event, file_mode)?;
308     let fd_table = ProcessManager::current_pcb().fd_table();
309     let fd = fd_table.write().alloc_fd(file, None).map(|x| x as usize)?;
310     Ok(fd)
311 }
312 
313 pub fn perf_event_output(_ctx: *mut c_void, fd: usize, _flags: u32, data: &[u8]) -> Result<()> {
314     let file = get_perf_event_file(fd)?;
315     let bpf_event_file = file.deref().deref();
316     let bpf_event_file = bpf_event_file
317         .deref()
318         .ref_any()
319         .downcast_ref::<BpfPerfEvent>()
320         .ok_or(SystemError::EINVAL)?;
321     bpf_event_file.write_event(data)?;
322     file.epoll_callback()?;
323     Ok(())
324 }
325 
326 fn get_perf_event_file(fd: usize) -> Result<Arc<PerfEventInode>> {
327     let fd_table = ProcessManager::current_pcb().fd_table();
328     let file = fd_table
329         .read()
330         .get_file_by_fd(fd as _)
331         .ok_or(SystemError::EBADF)?;
332     let event = file
333         .inode()
334         .downcast_arc::<PerfEventInode>()
335         .ok_or(SystemError::EINVAL)?;
336     Ok(event)
337 }
338