1*fae6e9adSlinfeng mod bpf;
2*fae6e9adSlinfeng mod kprobe;
3*fae6e9adSlinfeng mod util;
4*fae6e9adSlinfeng
5*fae6e9adSlinfeng use crate::filesystem::vfs::file::{File, FileMode, PageCache};
6*fae6e9adSlinfeng use crate::filesystem::vfs::syscall::ModeType;
7*fae6e9adSlinfeng use crate::filesystem::vfs::{
8*fae6e9adSlinfeng FilePrivateData, FileSystem, FileType, FsInfo, IndexNode, Metadata, SuperBlock,
9*fae6e9adSlinfeng };
10*fae6e9adSlinfeng use crate::include::bindings::linux_bpf::{
11*fae6e9adSlinfeng perf_event_attr, perf_event_sample_format, perf_sw_ids, perf_type_id,
12*fae6e9adSlinfeng };
13*fae6e9adSlinfeng use crate::libs::casting::DowncastArc;
14*fae6e9adSlinfeng use crate::libs::spinlock::{SpinLock, SpinLockGuard};
15*fae6e9adSlinfeng use crate::mm::fault::{PageFaultHandler, PageFaultMessage};
16*fae6e9adSlinfeng use crate::mm::VmFaultReason;
17*fae6e9adSlinfeng use crate::net::event_poll::{EPollEventType, EPollItem, EventPoll, KernelIoctlData};
18*fae6e9adSlinfeng use crate::perf::bpf::BpfPerfEvent;
19*fae6e9adSlinfeng use crate::perf::util::{PerfEventIoc, PerfEventOpenFlags, PerfProbeArgs};
20*fae6e9adSlinfeng use crate::process::ProcessManager;
21*fae6e9adSlinfeng use crate::syscall::user_access::UserBufferReader;
22*fae6e9adSlinfeng use crate::syscall::Syscall;
23*fae6e9adSlinfeng use alloc::boxed::Box;
24*fae6e9adSlinfeng use alloc::collections::LinkedList;
25*fae6e9adSlinfeng use alloc::string::String;
26*fae6e9adSlinfeng use alloc::sync::{Arc, Weak};
27*fae6e9adSlinfeng use alloc::vec::Vec;
28*fae6e9adSlinfeng use core::any::Any;
29*fae6e9adSlinfeng use core::ffi::c_void;
30*fae6e9adSlinfeng use core::fmt::Debug;
31*fae6e9adSlinfeng use core::ops::Deref;
32*fae6e9adSlinfeng use intertrait::{CastFrom, CastFromSync};
33*fae6e9adSlinfeng use log::info;
34*fae6e9adSlinfeng use num_traits::FromPrimitive;
35*fae6e9adSlinfeng use system_error::SystemError;
36*fae6e9adSlinfeng
37*fae6e9adSlinfeng type Result<T> = core::result::Result<T, SystemError>;
38*fae6e9adSlinfeng
39*fae6e9adSlinfeng pub trait PerfEventOps: Send + Sync + Debug + CastFromSync + CastFrom + IndexNode {
40*fae6e9adSlinfeng /// Set the bpf program for the perf event
set_bpf_prog(&self, _bpf_prog: Arc<File>) -> Result<()>41*fae6e9adSlinfeng fn set_bpf_prog(&self, _bpf_prog: Arc<File>) -> Result<()> {
42*fae6e9adSlinfeng Err(SystemError::ENOSYS)
43*fae6e9adSlinfeng }
44*fae6e9adSlinfeng /// Enable the perf event
enable(&self) -> Result<()>45*fae6e9adSlinfeng fn enable(&self) -> Result<()> {
46*fae6e9adSlinfeng Err(SystemError::ENOSYS)
47*fae6e9adSlinfeng }
48*fae6e9adSlinfeng /// Disable the perf event
disable(&self) -> Result<()>49*fae6e9adSlinfeng fn disable(&self) -> Result<()> {
50*fae6e9adSlinfeng Err(SystemError::ENOSYS)
51*fae6e9adSlinfeng }
52*fae6e9adSlinfeng /// Whether the perf event is readable
readable(&self) -> bool53*fae6e9adSlinfeng fn readable(&self) -> bool;
54*fae6e9adSlinfeng }
55*fae6e9adSlinfeng
56*fae6e9adSlinfeng #[derive(Debug)]
57*fae6e9adSlinfeng pub struct PerfEventInode {
58*fae6e9adSlinfeng event: Box<dyn PerfEventOps>,
59*fae6e9adSlinfeng epitems: SpinLock<LinkedList<Arc<EPollItem>>>,
60*fae6e9adSlinfeng }
61*fae6e9adSlinfeng
62*fae6e9adSlinfeng impl PerfEventInode {
new(event: Box<dyn PerfEventOps>) -> Self63*fae6e9adSlinfeng pub fn new(event: Box<dyn PerfEventOps>) -> Self {
64*fae6e9adSlinfeng Self {
65*fae6e9adSlinfeng event,
66*fae6e9adSlinfeng epitems: SpinLock::new(LinkedList::new()),
67*fae6e9adSlinfeng }
68*fae6e9adSlinfeng }
remove_epoll( &self, epoll: &Weak<SpinLock<EventPoll>>, ) -> core::result::Result<(), SystemError>69*fae6e9adSlinfeng pub fn remove_epoll(
70*fae6e9adSlinfeng &self,
71*fae6e9adSlinfeng epoll: &Weak<SpinLock<EventPoll>>,
72*fae6e9adSlinfeng ) -> core::result::Result<(), SystemError> {
73*fae6e9adSlinfeng let is_remove = !self
74*fae6e9adSlinfeng .epitems
75*fae6e9adSlinfeng .lock_irqsave()
76*fae6e9adSlinfeng .extract_if(|x| x.epoll().ptr_eq(epoll))
77*fae6e9adSlinfeng .collect::<Vec<_>>()
78*fae6e9adSlinfeng .is_empty();
79*fae6e9adSlinfeng if is_remove {
80*fae6e9adSlinfeng return Ok(());
81*fae6e9adSlinfeng }
82*fae6e9adSlinfeng Err(SystemError::ENOENT)
83*fae6e9adSlinfeng }
do_poll(&self) -> Result<usize>84*fae6e9adSlinfeng fn do_poll(&self) -> Result<usize> {
85*fae6e9adSlinfeng let mut events = EPollEventType::empty();
86*fae6e9adSlinfeng if self.event.readable() {
87*fae6e9adSlinfeng events |= EPollEventType::EPOLLIN | EPollEventType::EPOLLRDNORM;
88*fae6e9adSlinfeng }
89*fae6e9adSlinfeng return Ok(events.bits() as usize);
90*fae6e9adSlinfeng }
epoll_callback(&self) -> Result<()>91*fae6e9adSlinfeng fn epoll_callback(&self) -> Result<()> {
92*fae6e9adSlinfeng let pollflag = EPollEventType::from_bits_truncate(self.do_poll()? as u32);
93*fae6e9adSlinfeng // 唤醒epoll中等待的进程
94*fae6e9adSlinfeng EventPoll::wakeup_epoll(&self.epitems, Some(pollflag))
95*fae6e9adSlinfeng }
96*fae6e9adSlinfeng }
97*fae6e9adSlinfeng
98*fae6e9adSlinfeng impl Deref for PerfEventInode {
99*fae6e9adSlinfeng type Target = Box<dyn PerfEventOps>;
100*fae6e9adSlinfeng
deref(&self) -> &Self::Target101*fae6e9adSlinfeng fn deref(&self) -> &Self::Target {
102*fae6e9adSlinfeng &self.event
103*fae6e9adSlinfeng }
104*fae6e9adSlinfeng }
105*fae6e9adSlinfeng
106*fae6e9adSlinfeng impl IndexNode for PerfEventInode {
mmap(&self, start: usize, len: usize, offset: usize) -> Result<()>107*fae6e9adSlinfeng fn mmap(&self, start: usize, len: usize, offset: usize) -> Result<()> {
108*fae6e9adSlinfeng self.event.mmap(start, len, offset)
109*fae6e9adSlinfeng }
open(&self, _data: SpinLockGuard<FilePrivateData>, _mode: &FileMode) -> Result<()>110*fae6e9adSlinfeng fn open(&self, _data: SpinLockGuard<FilePrivateData>, _mode: &FileMode) -> Result<()> {
111*fae6e9adSlinfeng Ok(())
112*fae6e9adSlinfeng }
close(&self, _data: SpinLockGuard<FilePrivateData>) -> Result<()>113*fae6e9adSlinfeng fn close(&self, _data: SpinLockGuard<FilePrivateData>) -> Result<()> {
114*fae6e9adSlinfeng Ok(())
115*fae6e9adSlinfeng }
read_at( &self, _offset: usize, _len: usize, _buf: &mut [u8], _data: SpinLockGuard<FilePrivateData>, ) -> Result<usize>116*fae6e9adSlinfeng fn read_at(
117*fae6e9adSlinfeng &self,
118*fae6e9adSlinfeng _offset: usize,
119*fae6e9adSlinfeng _len: usize,
120*fae6e9adSlinfeng _buf: &mut [u8],
121*fae6e9adSlinfeng _data: SpinLockGuard<FilePrivateData>,
122*fae6e9adSlinfeng ) -> Result<usize> {
123*fae6e9adSlinfeng panic!("read_at not implemented for PerfEvent");
124*fae6e9adSlinfeng }
125*fae6e9adSlinfeng
write_at( &self, _offset: usize, _len: usize, _buf: &[u8], _data: SpinLockGuard<FilePrivateData>, ) -> Result<usize>126*fae6e9adSlinfeng fn write_at(
127*fae6e9adSlinfeng &self,
128*fae6e9adSlinfeng _offset: usize,
129*fae6e9adSlinfeng _len: usize,
130*fae6e9adSlinfeng _buf: &[u8],
131*fae6e9adSlinfeng _data: SpinLockGuard<FilePrivateData>,
132*fae6e9adSlinfeng ) -> Result<usize> {
133*fae6e9adSlinfeng panic!("write_at not implemented for PerfEvent");
134*fae6e9adSlinfeng }
135*fae6e9adSlinfeng
poll(&self, _private_data: &FilePrivateData) -> Result<usize>136*fae6e9adSlinfeng fn poll(&self, _private_data: &FilePrivateData) -> Result<usize> {
137*fae6e9adSlinfeng self.do_poll()
138*fae6e9adSlinfeng }
139*fae6e9adSlinfeng
metadata(&self) -> Result<Metadata>140*fae6e9adSlinfeng fn metadata(&self) -> Result<Metadata> {
141*fae6e9adSlinfeng let meta = Metadata {
142*fae6e9adSlinfeng mode: ModeType::from_bits_truncate(0o755),
143*fae6e9adSlinfeng file_type: FileType::File,
144*fae6e9adSlinfeng ..Default::default()
145*fae6e9adSlinfeng };
146*fae6e9adSlinfeng Ok(meta)
147*fae6e9adSlinfeng }
148*fae6e9adSlinfeng
resize(&self, _len: usize) -> Result<()>149*fae6e9adSlinfeng fn resize(&self, _len: usize) -> Result<()> {
150*fae6e9adSlinfeng Ok(())
151*fae6e9adSlinfeng }
152*fae6e9adSlinfeng
ioctl(&self, cmd: u32, data: usize, _private_data: &FilePrivateData) -> Result<usize>153*fae6e9adSlinfeng fn ioctl(&self, cmd: u32, data: usize, _private_data: &FilePrivateData) -> Result<usize> {
154*fae6e9adSlinfeng let req = PerfEventIoc::from_u32(cmd).ok_or(SystemError::EINVAL)?;
155*fae6e9adSlinfeng info!("perf_event_ioctl: request: {:?}, arg: {}", req, data);
156*fae6e9adSlinfeng match req {
157*fae6e9adSlinfeng PerfEventIoc::Enable => {
158*fae6e9adSlinfeng self.event.enable()?;
159*fae6e9adSlinfeng Ok(0)
160*fae6e9adSlinfeng }
161*fae6e9adSlinfeng PerfEventIoc::Disable => {
162*fae6e9adSlinfeng self.event.disable()?;
163*fae6e9adSlinfeng Ok(0)
164*fae6e9adSlinfeng }
165*fae6e9adSlinfeng PerfEventIoc::SetBpf => {
166*fae6e9adSlinfeng info!("perf_event_ioctl: PERF_EVENT_IOC_SET_BPF, arg: {}", data);
167*fae6e9adSlinfeng let bpf_prog_fd = data;
168*fae6e9adSlinfeng let fd_table = ProcessManager::current_pcb().fd_table();
169*fae6e9adSlinfeng let file = fd_table
170*fae6e9adSlinfeng .read()
171*fae6e9adSlinfeng .get_file_by_fd(bpf_prog_fd as _)
172*fae6e9adSlinfeng .ok_or(SystemError::EBADF)?;
173*fae6e9adSlinfeng self.event.set_bpf_prog(file)?;
174*fae6e9adSlinfeng Ok(0)
175*fae6e9adSlinfeng }
176*fae6e9adSlinfeng }
177*fae6e9adSlinfeng }
178*fae6e9adSlinfeng
kernel_ioctl( &self, arg: Arc<dyn KernelIoctlData>, _data: &FilePrivateData, ) -> core::result::Result<usize, SystemError>179*fae6e9adSlinfeng fn kernel_ioctl(
180*fae6e9adSlinfeng &self,
181*fae6e9adSlinfeng arg: Arc<dyn KernelIoctlData>,
182*fae6e9adSlinfeng _data: &FilePrivateData,
183*fae6e9adSlinfeng ) -> core::result::Result<usize, SystemError> {
184*fae6e9adSlinfeng let epitem = arg
185*fae6e9adSlinfeng .arc_any()
186*fae6e9adSlinfeng .downcast::<EPollItem>()
187*fae6e9adSlinfeng .map_err(|_| SystemError::EFAULT)?;
188*fae6e9adSlinfeng self.epitems.lock().push_back(epitem);
189*fae6e9adSlinfeng Ok(0)
190*fae6e9adSlinfeng }
191*fae6e9adSlinfeng
fs(&self) -> Arc<dyn FileSystem>192*fae6e9adSlinfeng fn fs(&self) -> Arc<dyn FileSystem> {
193*fae6e9adSlinfeng // panic!("PerfEvent does not have a filesystem")
194*fae6e9adSlinfeng Arc::new(PerfFakeFs)
195*fae6e9adSlinfeng }
as_any_ref(&self) -> &dyn Any196*fae6e9adSlinfeng fn as_any_ref(&self) -> &dyn Any {
197*fae6e9adSlinfeng self
198*fae6e9adSlinfeng }
list(&self) -> Result<Vec<String>>199*fae6e9adSlinfeng fn list(&self) -> Result<Vec<String>> {
200*fae6e9adSlinfeng Err(SystemError::ENOSYS)
201*fae6e9adSlinfeng }
page_cache(&self) -> Option<Arc<PageCache>>202*fae6e9adSlinfeng fn page_cache(&self) -> Option<Arc<PageCache>> {
203*fae6e9adSlinfeng self.event.page_cache()
204*fae6e9adSlinfeng }
205*fae6e9adSlinfeng }
206*fae6e9adSlinfeng
207*fae6e9adSlinfeng #[derive(Debug)]
208*fae6e9adSlinfeng struct PerfFakeFs;
209*fae6e9adSlinfeng
210*fae6e9adSlinfeng impl FileSystem for PerfFakeFs {
root_inode(&self) -> Arc<dyn IndexNode>211*fae6e9adSlinfeng fn root_inode(&self) -> Arc<dyn IndexNode> {
212*fae6e9adSlinfeng panic!("PerfFakeFs does not have a root inode")
213*fae6e9adSlinfeng }
214*fae6e9adSlinfeng
info(&self) -> FsInfo215*fae6e9adSlinfeng fn info(&self) -> FsInfo {
216*fae6e9adSlinfeng panic!("PerfFakeFs does not have a filesystem info")
217*fae6e9adSlinfeng }
218*fae6e9adSlinfeng
as_any_ref(&self) -> &dyn Any219*fae6e9adSlinfeng fn as_any_ref(&self) -> &dyn Any {
220*fae6e9adSlinfeng self
221*fae6e9adSlinfeng }
222*fae6e9adSlinfeng
name(&self) -> &str223*fae6e9adSlinfeng fn name(&self) -> &str {
224*fae6e9adSlinfeng "perf"
225*fae6e9adSlinfeng }
226*fae6e9adSlinfeng
super_block(&self) -> SuperBlock227*fae6e9adSlinfeng fn super_block(&self) -> SuperBlock {
228*fae6e9adSlinfeng panic!("PerfFakeFs does not have a super block")
229*fae6e9adSlinfeng }
fault(&self, pfm: &mut PageFaultMessage) -> VmFaultReason230*fae6e9adSlinfeng unsafe fn fault(&self, pfm: &mut PageFaultMessage) -> VmFaultReason {
231*fae6e9adSlinfeng let res = PageFaultHandler::filemap_fault(pfm);
232*fae6e9adSlinfeng res
233*fae6e9adSlinfeng }
map_pages( &self, pfm: &mut PageFaultMessage, start_pgoff: usize, end_pgoff: usize, ) -> VmFaultReason234*fae6e9adSlinfeng unsafe fn map_pages(
235*fae6e9adSlinfeng &self,
236*fae6e9adSlinfeng pfm: &mut PageFaultMessage,
237*fae6e9adSlinfeng start_pgoff: usize,
238*fae6e9adSlinfeng end_pgoff: usize,
239*fae6e9adSlinfeng ) -> VmFaultReason {
240*fae6e9adSlinfeng PageFaultHandler::filemap_map_pages(pfm, start_pgoff, end_pgoff)
241*fae6e9adSlinfeng }
242*fae6e9adSlinfeng }
243*fae6e9adSlinfeng
244*fae6e9adSlinfeng impl Syscall {
sys_perf_event_open( attr: *const u8, pid: i32, cpu: i32, group_fd: i32, flags: u32, ) -> Result<usize>245*fae6e9adSlinfeng pub fn sys_perf_event_open(
246*fae6e9adSlinfeng attr: *const u8,
247*fae6e9adSlinfeng pid: i32,
248*fae6e9adSlinfeng cpu: i32,
249*fae6e9adSlinfeng group_fd: i32,
250*fae6e9adSlinfeng flags: u32,
251*fae6e9adSlinfeng ) -> Result<usize> {
252*fae6e9adSlinfeng let buf = UserBufferReader::new(
253*fae6e9adSlinfeng attr as *const perf_event_attr,
254*fae6e9adSlinfeng size_of::<perf_event_attr>(),
255*fae6e9adSlinfeng true,
256*fae6e9adSlinfeng )?;
257*fae6e9adSlinfeng let attr = buf.read_one_from_user(0)?;
258*fae6e9adSlinfeng perf_event_open(attr, pid, cpu, group_fd, flags)
259*fae6e9adSlinfeng }
260*fae6e9adSlinfeng }
261*fae6e9adSlinfeng
perf_event_open( attr: &perf_event_attr, pid: i32, cpu: i32, group_fd: i32, flags: u32, ) -> Result<usize>262*fae6e9adSlinfeng pub fn perf_event_open(
263*fae6e9adSlinfeng attr: &perf_event_attr,
264*fae6e9adSlinfeng pid: i32,
265*fae6e9adSlinfeng cpu: i32,
266*fae6e9adSlinfeng group_fd: i32,
267*fae6e9adSlinfeng flags: u32,
268*fae6e9adSlinfeng ) -> Result<usize> {
269*fae6e9adSlinfeng let args = PerfProbeArgs::try_from(attr, pid, cpu, group_fd, flags)?;
270*fae6e9adSlinfeng log::info!("perf_event_process: {:#?}", args);
271*fae6e9adSlinfeng let file_mode = if args
272*fae6e9adSlinfeng .flags
273*fae6e9adSlinfeng .contains(PerfEventOpenFlags::PERF_FLAG_FD_CLOEXEC)
274*fae6e9adSlinfeng {
275*fae6e9adSlinfeng FileMode::O_RDWR | FileMode::O_CLOEXEC
276*fae6e9adSlinfeng } else {
277*fae6e9adSlinfeng FileMode::O_RDWR
278*fae6e9adSlinfeng };
279*fae6e9adSlinfeng
280*fae6e9adSlinfeng let event: Box<dyn PerfEventOps> = match args.type_ {
281*fae6e9adSlinfeng // Kprobe
282*fae6e9adSlinfeng // See /sys/bus/event_source/devices/kprobe/type
283*fae6e9adSlinfeng perf_type_id::PERF_TYPE_MAX => {
284*fae6e9adSlinfeng let kprobe_event = kprobe::perf_event_open_kprobe(args);
285*fae6e9adSlinfeng Box::new(kprobe_event)
286*fae6e9adSlinfeng }
287*fae6e9adSlinfeng perf_type_id::PERF_TYPE_SOFTWARE => {
288*fae6e9adSlinfeng // For bpf prog output
289*fae6e9adSlinfeng assert_eq!(args.config, perf_sw_ids::PERF_COUNT_SW_BPF_OUTPUT);
290*fae6e9adSlinfeng assert_eq!(
291*fae6e9adSlinfeng args.sample_type,
292*fae6e9adSlinfeng Some(perf_event_sample_format::PERF_SAMPLE_RAW)
293*fae6e9adSlinfeng );
294*fae6e9adSlinfeng let bpf_event = bpf::perf_event_open_bpf(args);
295*fae6e9adSlinfeng Box::new(bpf_event)
296*fae6e9adSlinfeng }
297*fae6e9adSlinfeng _ => {
298*fae6e9adSlinfeng unimplemented!("perf_event_process: unknown type: {:?}", args);
299*fae6e9adSlinfeng }
300*fae6e9adSlinfeng };
301*fae6e9adSlinfeng
302*fae6e9adSlinfeng let page_cache = event.page_cache();
303*fae6e9adSlinfeng let perf_event = Arc::new(PerfEventInode::new(event));
304*fae6e9adSlinfeng if let Some(cache) = page_cache {
305*fae6e9adSlinfeng cache.set_inode(Arc::downgrade(&(perf_event.clone() as _)))?;
306*fae6e9adSlinfeng }
307*fae6e9adSlinfeng let file = File::new(perf_event, file_mode)?;
308*fae6e9adSlinfeng let fd_table = ProcessManager::current_pcb().fd_table();
309*fae6e9adSlinfeng let fd = fd_table.write().alloc_fd(file, None).map(|x| x as usize)?;
310*fae6e9adSlinfeng Ok(fd)
311*fae6e9adSlinfeng }
312*fae6e9adSlinfeng
perf_event_output(_ctx: *mut c_void, fd: usize, _flags: u32, data: &[u8]) -> Result<()>313*fae6e9adSlinfeng pub fn perf_event_output(_ctx: *mut c_void, fd: usize, _flags: u32, data: &[u8]) -> Result<()> {
314*fae6e9adSlinfeng let file = get_perf_event_file(fd)?;
315*fae6e9adSlinfeng let bpf_event_file = file.deref().deref();
316*fae6e9adSlinfeng let bpf_event_file = bpf_event_file
317*fae6e9adSlinfeng .deref()
318*fae6e9adSlinfeng .ref_any()
319*fae6e9adSlinfeng .downcast_ref::<BpfPerfEvent>()
320*fae6e9adSlinfeng .ok_or(SystemError::EINVAL)?;
321*fae6e9adSlinfeng bpf_event_file.write_event(data)?;
322*fae6e9adSlinfeng file.epoll_callback()?;
323*fae6e9adSlinfeng Ok(())
324*fae6e9adSlinfeng }
325*fae6e9adSlinfeng
get_perf_event_file(fd: usize) -> Result<Arc<PerfEventInode>>326*fae6e9adSlinfeng fn get_perf_event_file(fd: usize) -> Result<Arc<PerfEventInode>> {
327*fae6e9adSlinfeng let fd_table = ProcessManager::current_pcb().fd_table();
328*fae6e9adSlinfeng let file = fd_table
329*fae6e9adSlinfeng .read()
330*fae6e9adSlinfeng .get_file_by_fd(fd as _)
331*fae6e9adSlinfeng .ok_or(SystemError::EBADF)?;
332*fae6e9adSlinfeng let event = file
333*fae6e9adSlinfeng .inode()
334*fae6e9adSlinfeng .downcast_arc::<PerfEventInode>()
335*fae6e9adSlinfeng .ok_or(SystemError::EINVAL)?;
336*fae6e9adSlinfeng Ok(event)
337*fae6e9adSlinfeng }
338