1 mod bpf;
2 mod kprobe;
3 mod util;
4
5 use crate::filesystem::vfs::file::{File, FileMode, PageCache};
6 use crate::filesystem::vfs::syscall::ModeType;
7 use crate::filesystem::vfs::{
8 FilePrivateData, FileSystem, FileType, FsInfo, IndexNode, Metadata, SuperBlock,
9 };
10 use crate::include::bindings::linux_bpf::{
11 perf_event_attr, perf_event_sample_format, perf_sw_ids, perf_type_id,
12 };
13 use crate::libs::casting::DowncastArc;
14 use crate::libs::spinlock::{SpinLock, SpinLockGuard};
15 use crate::mm::fault::{PageFaultHandler, PageFaultMessage};
16 use crate::mm::VmFaultReason;
17 use crate::net::event_poll::{EPollEventType, EPollItem, EventPoll, KernelIoctlData};
18 use crate::perf::bpf::BpfPerfEvent;
19 use crate::perf::util::{PerfEventIoc, PerfEventOpenFlags, PerfProbeArgs};
20 use crate::process::ProcessManager;
21 use crate::syscall::user_access::UserBufferReader;
22 use crate::syscall::Syscall;
23 use alloc::boxed::Box;
24 use alloc::collections::LinkedList;
25 use alloc::string::String;
26 use alloc::sync::{Arc, Weak};
27 use alloc::vec::Vec;
28 use core::any::Any;
29 use core::ffi::c_void;
30 use core::fmt::Debug;
31 use core::ops::Deref;
32 use intertrait::{CastFrom, CastFromSync};
33 use log::info;
34 use num_traits::FromPrimitive;
35 use system_error::SystemError;
36
37 type Result<T> = core::result::Result<T, SystemError>;
38
39 pub trait PerfEventOps: Send + Sync + Debug + CastFromSync + CastFrom + IndexNode {
40 /// Set the bpf program for the perf event
set_bpf_prog(&self, _bpf_prog: Arc<File>) -> Result<()>41 fn set_bpf_prog(&self, _bpf_prog: Arc<File>) -> Result<()> {
42 Err(SystemError::ENOSYS)
43 }
44 /// Enable the perf event
enable(&self) -> Result<()>45 fn enable(&self) -> Result<()> {
46 Err(SystemError::ENOSYS)
47 }
48 /// Disable the perf event
disable(&self) -> Result<()>49 fn disable(&self) -> Result<()> {
50 Err(SystemError::ENOSYS)
51 }
52 /// Whether the perf event is readable
readable(&self) -> bool53 fn readable(&self) -> bool;
54 }
55
56 #[derive(Debug)]
57 pub struct PerfEventInode {
58 event: Box<dyn PerfEventOps>,
59 epitems: SpinLock<LinkedList<Arc<EPollItem>>>,
60 }
61
62 impl PerfEventInode {
new(event: Box<dyn PerfEventOps>) -> Self63 pub fn new(event: Box<dyn PerfEventOps>) -> Self {
64 Self {
65 event,
66 epitems: SpinLock::new(LinkedList::new()),
67 }
68 }
remove_epoll( &self, epoll: &Weak<SpinLock<EventPoll>>, ) -> core::result::Result<(), SystemError>69 pub fn remove_epoll(
70 &self,
71 epoll: &Weak<SpinLock<EventPoll>>,
72 ) -> core::result::Result<(), SystemError> {
73 let is_remove = !self
74 .epitems
75 .lock_irqsave()
76 .extract_if(|x| x.epoll().ptr_eq(epoll))
77 .collect::<Vec<_>>()
78 .is_empty();
79 if is_remove {
80 return Ok(());
81 }
82 Err(SystemError::ENOENT)
83 }
do_poll(&self) -> Result<usize>84 fn do_poll(&self) -> Result<usize> {
85 let mut events = EPollEventType::empty();
86 if self.event.readable() {
87 events |= EPollEventType::EPOLLIN | EPollEventType::EPOLLRDNORM;
88 }
89 return Ok(events.bits() as usize);
90 }
epoll_callback(&self) -> Result<()>91 fn epoll_callback(&self) -> Result<()> {
92 let pollflag = EPollEventType::from_bits_truncate(self.do_poll()? as u32);
93 // 唤醒epoll中等待的进程
94 EventPoll::wakeup_epoll(&self.epitems, Some(pollflag))
95 }
96 }
97
98 impl Deref for PerfEventInode {
99 type Target = Box<dyn PerfEventOps>;
100
deref(&self) -> &Self::Target101 fn deref(&self) -> &Self::Target {
102 &self.event
103 }
104 }
105
106 impl IndexNode for PerfEventInode {
mmap(&self, start: usize, len: usize, offset: usize) -> Result<()>107 fn mmap(&self, start: usize, len: usize, offset: usize) -> Result<()> {
108 self.event.mmap(start, len, offset)
109 }
open(&self, _data: SpinLockGuard<FilePrivateData>, _mode: &FileMode) -> Result<()>110 fn open(&self, _data: SpinLockGuard<FilePrivateData>, _mode: &FileMode) -> Result<()> {
111 Ok(())
112 }
close(&self, _data: SpinLockGuard<FilePrivateData>) -> Result<()>113 fn close(&self, _data: SpinLockGuard<FilePrivateData>) -> Result<()> {
114 Ok(())
115 }
read_at( &self, _offset: usize, _len: usize, _buf: &mut [u8], _data: SpinLockGuard<FilePrivateData>, ) -> Result<usize>116 fn read_at(
117 &self,
118 _offset: usize,
119 _len: usize,
120 _buf: &mut [u8],
121 _data: SpinLockGuard<FilePrivateData>,
122 ) -> Result<usize> {
123 panic!("read_at not implemented for PerfEvent");
124 }
125
write_at( &self, _offset: usize, _len: usize, _buf: &[u8], _data: SpinLockGuard<FilePrivateData>, ) -> Result<usize>126 fn write_at(
127 &self,
128 _offset: usize,
129 _len: usize,
130 _buf: &[u8],
131 _data: SpinLockGuard<FilePrivateData>,
132 ) -> Result<usize> {
133 panic!("write_at not implemented for PerfEvent");
134 }
135
poll(&self, _private_data: &FilePrivateData) -> Result<usize>136 fn poll(&self, _private_data: &FilePrivateData) -> Result<usize> {
137 self.do_poll()
138 }
139
metadata(&self) -> Result<Metadata>140 fn metadata(&self) -> Result<Metadata> {
141 let meta = Metadata {
142 mode: ModeType::from_bits_truncate(0o755),
143 file_type: FileType::File,
144 ..Default::default()
145 };
146 Ok(meta)
147 }
148
resize(&self, _len: usize) -> Result<()>149 fn resize(&self, _len: usize) -> Result<()> {
150 Ok(())
151 }
152
ioctl(&self, cmd: u32, data: usize, _private_data: &FilePrivateData) -> Result<usize>153 fn ioctl(&self, cmd: u32, data: usize, _private_data: &FilePrivateData) -> Result<usize> {
154 let req = PerfEventIoc::from_u32(cmd).ok_or(SystemError::EINVAL)?;
155 info!("perf_event_ioctl: request: {:?}, arg: {}", req, data);
156 match req {
157 PerfEventIoc::Enable => {
158 self.event.enable()?;
159 Ok(0)
160 }
161 PerfEventIoc::Disable => {
162 self.event.disable()?;
163 Ok(0)
164 }
165 PerfEventIoc::SetBpf => {
166 info!("perf_event_ioctl: PERF_EVENT_IOC_SET_BPF, arg: {}", data);
167 let bpf_prog_fd = data;
168 let fd_table = ProcessManager::current_pcb().fd_table();
169 let file = fd_table
170 .read()
171 .get_file_by_fd(bpf_prog_fd as _)
172 .ok_or(SystemError::EBADF)?;
173 self.event.set_bpf_prog(file)?;
174 Ok(0)
175 }
176 }
177 }
178
kernel_ioctl( &self, arg: Arc<dyn KernelIoctlData>, _data: &FilePrivateData, ) -> core::result::Result<usize, SystemError>179 fn kernel_ioctl(
180 &self,
181 arg: Arc<dyn KernelIoctlData>,
182 _data: &FilePrivateData,
183 ) -> core::result::Result<usize, SystemError> {
184 let epitem = arg
185 .arc_any()
186 .downcast::<EPollItem>()
187 .map_err(|_| SystemError::EFAULT)?;
188 self.epitems.lock().push_back(epitem);
189 Ok(0)
190 }
191
fs(&self) -> Arc<dyn FileSystem>192 fn fs(&self) -> Arc<dyn FileSystem> {
193 // panic!("PerfEvent does not have a filesystem")
194 Arc::new(PerfFakeFs)
195 }
as_any_ref(&self) -> &dyn Any196 fn as_any_ref(&self) -> &dyn Any {
197 self
198 }
list(&self) -> Result<Vec<String>>199 fn list(&self) -> Result<Vec<String>> {
200 Err(SystemError::ENOSYS)
201 }
page_cache(&self) -> Option<Arc<PageCache>>202 fn page_cache(&self) -> Option<Arc<PageCache>> {
203 self.event.page_cache()
204 }
205 }
206
207 #[derive(Debug)]
208 struct PerfFakeFs;
209
210 impl FileSystem for PerfFakeFs {
root_inode(&self) -> Arc<dyn IndexNode>211 fn root_inode(&self) -> Arc<dyn IndexNode> {
212 panic!("PerfFakeFs does not have a root inode")
213 }
214
info(&self) -> FsInfo215 fn info(&self) -> FsInfo {
216 panic!("PerfFakeFs does not have a filesystem info")
217 }
218
as_any_ref(&self) -> &dyn Any219 fn as_any_ref(&self) -> &dyn Any {
220 self
221 }
222
name(&self) -> &str223 fn name(&self) -> &str {
224 "perf"
225 }
226
super_block(&self) -> SuperBlock227 fn super_block(&self) -> SuperBlock {
228 panic!("PerfFakeFs does not have a super block")
229 }
fault(&self, pfm: &mut PageFaultMessage) -> VmFaultReason230 unsafe fn fault(&self, pfm: &mut PageFaultMessage) -> VmFaultReason {
231 let res = PageFaultHandler::filemap_fault(pfm);
232 res
233 }
map_pages( &self, pfm: &mut PageFaultMessage, start_pgoff: usize, end_pgoff: usize, ) -> VmFaultReason234 unsafe fn map_pages(
235 &self,
236 pfm: &mut PageFaultMessage,
237 start_pgoff: usize,
238 end_pgoff: usize,
239 ) -> VmFaultReason {
240 PageFaultHandler::filemap_map_pages(pfm, start_pgoff, end_pgoff)
241 }
242 }
243
244 impl Syscall {
sys_perf_event_open( attr: *const u8, pid: i32, cpu: i32, group_fd: i32, flags: u32, ) -> Result<usize>245 pub fn sys_perf_event_open(
246 attr: *const u8,
247 pid: i32,
248 cpu: i32,
249 group_fd: i32,
250 flags: u32,
251 ) -> Result<usize> {
252 let buf = UserBufferReader::new(
253 attr as *const perf_event_attr,
254 size_of::<perf_event_attr>(),
255 true,
256 )?;
257 let attr = buf.read_one_from_user(0)?;
258 perf_event_open(attr, pid, cpu, group_fd, flags)
259 }
260 }
261
perf_event_open( attr: &perf_event_attr, pid: i32, cpu: i32, group_fd: i32, flags: u32, ) -> Result<usize>262 pub fn perf_event_open(
263 attr: &perf_event_attr,
264 pid: i32,
265 cpu: i32,
266 group_fd: i32,
267 flags: u32,
268 ) -> Result<usize> {
269 let args = PerfProbeArgs::try_from(attr, pid, cpu, group_fd, flags)?;
270 log::info!("perf_event_process: {:#?}", args);
271 let file_mode = if args
272 .flags
273 .contains(PerfEventOpenFlags::PERF_FLAG_FD_CLOEXEC)
274 {
275 FileMode::O_RDWR | FileMode::O_CLOEXEC
276 } else {
277 FileMode::O_RDWR
278 };
279
280 let event: Box<dyn PerfEventOps> = match args.type_ {
281 // Kprobe
282 // See /sys/bus/event_source/devices/kprobe/type
283 perf_type_id::PERF_TYPE_MAX => {
284 let kprobe_event = kprobe::perf_event_open_kprobe(args);
285 Box::new(kprobe_event)
286 }
287 perf_type_id::PERF_TYPE_SOFTWARE => {
288 // For bpf prog output
289 assert_eq!(args.config, perf_sw_ids::PERF_COUNT_SW_BPF_OUTPUT);
290 assert_eq!(
291 args.sample_type,
292 Some(perf_event_sample_format::PERF_SAMPLE_RAW)
293 );
294 let bpf_event = bpf::perf_event_open_bpf(args);
295 Box::new(bpf_event)
296 }
297 _ => {
298 unimplemented!("perf_event_process: unknown type: {:?}", args);
299 }
300 };
301
302 let page_cache = event.page_cache();
303 let perf_event = Arc::new(PerfEventInode::new(event));
304 if let Some(cache) = page_cache {
305 cache.set_inode(Arc::downgrade(&(perf_event.clone() as _)))?;
306 }
307 let file = File::new(perf_event, file_mode)?;
308 let fd_table = ProcessManager::current_pcb().fd_table();
309 let fd = fd_table.write().alloc_fd(file, None).map(|x| x as usize)?;
310 Ok(fd)
311 }
312
perf_event_output(_ctx: *mut c_void, fd: usize, _flags: u32, data: &[u8]) -> Result<()>313 pub fn perf_event_output(_ctx: *mut c_void, fd: usize, _flags: u32, data: &[u8]) -> Result<()> {
314 let file = get_perf_event_file(fd)?;
315 let bpf_event_file = file.deref().deref();
316 let bpf_event_file = bpf_event_file
317 .deref()
318 .ref_any()
319 .downcast_ref::<BpfPerfEvent>()
320 .ok_or(SystemError::EINVAL)?;
321 bpf_event_file.write_event(data)?;
322 file.epoll_callback()?;
323 Ok(())
324 }
325
get_perf_event_file(fd: usize) -> Result<Arc<PerfEventInode>>326 fn get_perf_event_file(fd: usize) -> Result<Arc<PerfEventInode>> {
327 let fd_table = ProcessManager::current_pcb().fd_table();
328 let file = fd_table
329 .read()
330 .get_file_by_fd(fd as _)
331 .ok_or(SystemError::EBADF)?;
332 let event = file
333 .inode()
334 .downcast_arc::<PerfEventInode>()
335 .ok_or(SystemError::EINVAL)?;
336 Ok(event)
337 }
338