xref: /DragonOS/kernel/src/perf/bpf.rs (revision 7c28051e8c601312d3d0fd7bcb71bc71450d10c0)
1fae6e9adSlinfeng use super::{PerfEventOps, Result};
2fae6e9adSlinfeng use crate::arch::mm::LockedFrameAllocator;
3fae6e9adSlinfeng use crate::arch::MMArch;
4fae6e9adSlinfeng use crate::filesystem::vfs::file::PageCache;
5fae6e9adSlinfeng use crate::filesystem::vfs::{FilePrivateData, FileSystem, IndexNode};
6fae6e9adSlinfeng use crate::include::bindings::linux_bpf::{
7fae6e9adSlinfeng     perf_event_header, perf_event_mmap_page, perf_event_type,
8fae6e9adSlinfeng };
9fae6e9adSlinfeng use crate::libs::spinlock::{SpinLock, SpinLockGuard};
10fae6e9adSlinfeng use crate::mm::allocator::page_frame::{FrameAllocator, PageFrameCount, PhysPageFrame};
11fae6e9adSlinfeng use crate::mm::page::{page_manager_lock_irqsave, Page};
12fae6e9adSlinfeng use crate::mm::{MemoryManagementArch, PhysAddr};
13fae6e9adSlinfeng use crate::perf::util::{LostSamples, PerfProbeArgs, PerfSample, SampleHeader};
14fae6e9adSlinfeng use alloc::string::String;
15fae6e9adSlinfeng use alloc::sync::Arc;
16fae6e9adSlinfeng use alloc::vec::Vec;
17fae6e9adSlinfeng use core::any::Any;
18fae6e9adSlinfeng use core::fmt::Debug;
19fae6e9adSlinfeng use system_error::SystemError;
20fae6e9adSlinfeng const PAGE_SIZE: usize = MMArch::PAGE_SIZE;
21fae6e9adSlinfeng #[derive(Debug)]
22fae6e9adSlinfeng pub struct BpfPerfEvent {
23fae6e9adSlinfeng     _args: PerfProbeArgs,
24fae6e9adSlinfeng     data: SpinLock<BpfPerfEventData>,
25fae6e9adSlinfeng }
26fae6e9adSlinfeng 
27fae6e9adSlinfeng #[derive(Debug)]
28fae6e9adSlinfeng pub struct BpfPerfEventData {
29fae6e9adSlinfeng     enabled: bool,
30fae6e9adSlinfeng     mmap_page: RingPage,
31fae6e9adSlinfeng     page_cache: Arc<PageCache>,
32fae6e9adSlinfeng     offset: usize,
33fae6e9adSlinfeng }
34fae6e9adSlinfeng 
35fae6e9adSlinfeng #[derive(Debug)]
36fae6e9adSlinfeng pub struct RingPage {
37fae6e9adSlinfeng     size: usize,
38fae6e9adSlinfeng     ptr: usize,
39fae6e9adSlinfeng     data_region_size: usize,
40fae6e9adSlinfeng     lost: usize,
41fae6e9adSlinfeng     phys_addr: PhysAddr,
42fae6e9adSlinfeng }
43fae6e9adSlinfeng 
44fae6e9adSlinfeng impl RingPage {
empty() -> Self45fae6e9adSlinfeng     pub fn empty() -> Self {
46fae6e9adSlinfeng         RingPage {
47fae6e9adSlinfeng             ptr: 0,
48fae6e9adSlinfeng             size: 0,
49fae6e9adSlinfeng             data_region_size: 0,
50fae6e9adSlinfeng             lost: 0,
51fae6e9adSlinfeng             phys_addr: PhysAddr::new(0),
52fae6e9adSlinfeng         }
53fae6e9adSlinfeng     }
54fae6e9adSlinfeng 
new_init(start: usize, len: usize, phys_addr: PhysAddr) -> Self55fae6e9adSlinfeng     pub fn new_init(start: usize, len: usize, phys_addr: PhysAddr) -> Self {
56fae6e9adSlinfeng         Self::init(start as _, len, phys_addr)
57fae6e9adSlinfeng     }
58fae6e9adSlinfeng 
init(ptr: *mut u8, size: usize, phys_addr: PhysAddr) -> Self59fae6e9adSlinfeng     fn init(ptr: *mut u8, size: usize, phys_addr: PhysAddr) -> Self {
60fae6e9adSlinfeng         assert_eq!(size % PAGE_SIZE, 0);
61fae6e9adSlinfeng         assert!(size / PAGE_SIZE >= 2);
62fae6e9adSlinfeng         // The first page will be filled with perf_event_mmap_page
63fae6e9adSlinfeng         unsafe {
64fae6e9adSlinfeng             let perf_event_mmap_page = &mut *(ptr as *mut perf_event_mmap_page);
65fae6e9adSlinfeng             perf_event_mmap_page.data_offset = PAGE_SIZE as u64;
66fae6e9adSlinfeng             perf_event_mmap_page.data_size = (size - PAGE_SIZE) as u64;
67fae6e9adSlinfeng             // user will read sample or lost record from data_tail
68fae6e9adSlinfeng             perf_event_mmap_page.data_tail = 0;
69fae6e9adSlinfeng             // kernel will write sample or lost record from data_head
70fae6e9adSlinfeng             perf_event_mmap_page.data_head = 0;
71fae6e9adSlinfeng             // It is a ring buffer.
72fae6e9adSlinfeng         }
73fae6e9adSlinfeng         RingPage {
74fae6e9adSlinfeng             ptr: ptr as usize,
75fae6e9adSlinfeng             size,
76fae6e9adSlinfeng             data_region_size: size - PAGE_SIZE,
77fae6e9adSlinfeng             lost: 0,
78fae6e9adSlinfeng             phys_addr,
79fae6e9adSlinfeng         }
80fae6e9adSlinfeng     }
81fae6e9adSlinfeng 
can_write(&self, data_size: usize, data_tail: usize, data_head: usize) -> bool82fae6e9adSlinfeng     fn can_write(&self, data_size: usize, data_tail: usize, data_head: usize) -> bool {
83fae6e9adSlinfeng         if (data_head + 1) % self.data_region_size == data_tail {
84fae6e9adSlinfeng             // The buffer is full
85fae6e9adSlinfeng             return false;
86fae6e9adSlinfeng         }
87fae6e9adSlinfeng         let capacity = if data_head >= data_tail {
88fae6e9adSlinfeng             self.data_region_size - data_head + data_tail
89fae6e9adSlinfeng         } else {
90fae6e9adSlinfeng             data_tail - data_head
91fae6e9adSlinfeng         };
92fae6e9adSlinfeng         data_size <= capacity
93fae6e9adSlinfeng     }
94fae6e9adSlinfeng 
write_event(&mut self, data: &[u8]) -> Result<()>95fae6e9adSlinfeng     pub fn write_event(&mut self, data: &[u8]) -> Result<()> {
96fae6e9adSlinfeng         let data_tail = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_tail };
97fae6e9adSlinfeng         let data_head = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_head };
98fae6e9adSlinfeng         // data_tail..data_head is the region that can be written
99fae6e9adSlinfeng         // check if there is enough space to write the event
100fae6e9adSlinfeng         let sample_size = PerfSample::calculate_size(data.len());
101fae6e9adSlinfeng 
102fae6e9adSlinfeng         let can_write_sample =
103fae6e9adSlinfeng             self.can_write(sample_size, *data_tail as usize, *data_head as usize);
104fae6e9adSlinfeng         // log::error!(
105fae6e9adSlinfeng         //     "can_write_sample: {}, data_tail: {}, data_head: {}, data.len(): {}, region_size: {}",
106fae6e9adSlinfeng         //     can_write_sample,
107fae6e9adSlinfeng         //     *data_tail,
108fae6e9adSlinfeng         //     *data_head,
109fae6e9adSlinfeng         //     data.len(),
110fae6e9adSlinfeng         //     self.data_region_size
111fae6e9adSlinfeng         // );
112fae6e9adSlinfeng         if !can_write_sample {
113fae6e9adSlinfeng             //we need record it to the lost record
114fae6e9adSlinfeng             self.lost += 1;
115fae6e9adSlinfeng             // log::error!(
116fae6e9adSlinfeng             //     "Lost record: {}, data_tail: {}, data_head: {}",
117fae6e9adSlinfeng             //     self.lost,
118fae6e9adSlinfeng             //     *data_tail,
119fae6e9adSlinfeng             //     *data_head
120fae6e9adSlinfeng             // );
121fae6e9adSlinfeng             Ok(())
122fae6e9adSlinfeng         } else {
123fae6e9adSlinfeng             // we can write the sample to the page
124fae6e9adSlinfeng             // If the lost record is not zero, we need to write the lost record first.
125fae6e9adSlinfeng             let can_write_lost_record = self.can_write(
126fae6e9adSlinfeng                 size_of::<LostSamples>(),
127fae6e9adSlinfeng                 *data_tail as usize,
128fae6e9adSlinfeng                 *data_head as usize,
129fae6e9adSlinfeng             );
130fae6e9adSlinfeng             if self.lost > 0 && can_write_lost_record {
131fae6e9adSlinfeng                 let new_data_head = self.write_lost(*data_head as usize)?;
132fae6e9adSlinfeng                 *data_head = new_data_head as u64;
133fae6e9adSlinfeng                 // log::info!(
134fae6e9adSlinfeng                 //     "Write lost record: {}, data_tail: {}, new_data_head: {}",
135fae6e9adSlinfeng                 //     self.lost,
136fae6e9adSlinfeng                 //     *data_tail,
137fae6e9adSlinfeng                 //     *data_head
138fae6e9adSlinfeng                 // );
139fae6e9adSlinfeng                 self.lost = 0;
140fae6e9adSlinfeng                 self.write_event(data)
141fae6e9adSlinfeng             } else {
142fae6e9adSlinfeng                 let new_data_head = self.write_sample(data, *data_head as usize)?;
143fae6e9adSlinfeng                 *data_head = new_data_head as u64;
144fae6e9adSlinfeng                 // log::info!(
145fae6e9adSlinfeng                 //     "Write sample record, data_tail: {}, new_data_head: {}",
146fae6e9adSlinfeng                 //     *data_tail,
147fae6e9adSlinfeng                 //     *data_head
148fae6e9adSlinfeng                 // );
149fae6e9adSlinfeng                 Ok(())
150fae6e9adSlinfeng             }
151fae6e9adSlinfeng         }
152fae6e9adSlinfeng     }
153fae6e9adSlinfeng 
154fae6e9adSlinfeng     /// Write any data to the page.
155fae6e9adSlinfeng     ///
156fae6e9adSlinfeng     /// Return the new data_head
write_any(&mut self, data: &[u8], data_head: usize) -> Result<usize>157fae6e9adSlinfeng     fn write_any(&mut self, data: &[u8], data_head: usize) -> Result<usize> {
158fae6e9adSlinfeng         let data_region_len = self.data_region_size;
159fae6e9adSlinfeng         let data_region = self.as_mut_slice()[PAGE_SIZE..].as_mut();
160fae6e9adSlinfeng         let data_len = data.len();
161fae6e9adSlinfeng         let end = (data_head + data_len) % data_region_len;
162fae6e9adSlinfeng         let start = data_head;
163fae6e9adSlinfeng         if start < end {
164fae6e9adSlinfeng             data_region[start..end].copy_from_slice(data);
165fae6e9adSlinfeng         } else {
166fae6e9adSlinfeng             let first_len = data_region_len - start;
167fae6e9adSlinfeng             data_region[start..start + first_len].copy_from_slice(&data[..first_len]);
168fae6e9adSlinfeng             data_region[0..end].copy_from_slice(&data[first_len..]);
169fae6e9adSlinfeng         }
170fae6e9adSlinfeng         Ok(end)
171fae6e9adSlinfeng     }
172fae6e9adSlinfeng 
173fae6e9adSlinfeng     /// Write a sample to the page.
write_sample(&mut self, data: &[u8], data_head: usize) -> Result<usize>174fae6e9adSlinfeng     fn write_sample(&mut self, data: &[u8], data_head: usize) -> Result<usize> {
175fae6e9adSlinfeng         let perf_sample = PerfSample {
176fae6e9adSlinfeng             s_hdr: SampleHeader {
177fae6e9adSlinfeng                 header: perf_event_header {
178fae6e9adSlinfeng                     type_: perf_event_type::PERF_RECORD_SAMPLE as u32,
179fae6e9adSlinfeng                     misc: 0,
180fae6e9adSlinfeng                     size: size_of::<SampleHeader>() as u16 + data.len() as u16,
181fae6e9adSlinfeng                 },
182fae6e9adSlinfeng                 size: data.len() as u32,
183fae6e9adSlinfeng             },
184fae6e9adSlinfeng             value: data,
185fae6e9adSlinfeng         };
186fae6e9adSlinfeng         let new_head = self.write_any(perf_sample.s_hdr.as_bytes(), data_head)?;
187fae6e9adSlinfeng         self.write_any(perf_sample.value, new_head)
188fae6e9adSlinfeng     }
189fae6e9adSlinfeng 
190fae6e9adSlinfeng     /// Write a lost record to the page.
191fae6e9adSlinfeng     ///
192fae6e9adSlinfeng     /// Return the new data_head
write_lost(&mut self, data_head: usize) -> Result<usize>193fae6e9adSlinfeng     fn write_lost(&mut self, data_head: usize) -> Result<usize> {
194fae6e9adSlinfeng         let lost = LostSamples {
195fae6e9adSlinfeng             header: perf_event_header {
196fae6e9adSlinfeng                 type_: perf_event_type::PERF_RECORD_LOST as u32,
197fae6e9adSlinfeng                 misc: 0,
198fae6e9adSlinfeng                 size: size_of::<LostSamples>() as u16,
199fae6e9adSlinfeng             },
200fae6e9adSlinfeng             id: 0,
201fae6e9adSlinfeng             count: self.lost as u64,
202fae6e9adSlinfeng         };
203fae6e9adSlinfeng         self.write_any(lost.as_bytes(), data_head)
204fae6e9adSlinfeng     }
205fae6e9adSlinfeng 
readable(&self) -> bool206fae6e9adSlinfeng     pub fn readable(&self) -> bool {
207fae6e9adSlinfeng         let data_tail = unsafe { &(*(self.ptr as *mut perf_event_mmap_page)).data_tail };
208fae6e9adSlinfeng         let data_head = unsafe { &(*(self.ptr as *mut perf_event_mmap_page)).data_head };
209fae6e9adSlinfeng         data_tail != data_head
210fae6e9adSlinfeng     }
211*7c28051eSlinfeng 
212*7c28051eSlinfeng     #[allow(dead_code)]
as_slice(&self) -> &[u8]213fae6e9adSlinfeng     pub fn as_slice(&self) -> &[u8] {
214fae6e9adSlinfeng         unsafe { core::slice::from_raw_parts(self.ptr as *const u8, self.size) }
215fae6e9adSlinfeng     }
as_mut_slice(&mut self) -> &mut [u8]216fae6e9adSlinfeng     pub fn as_mut_slice(&mut self) -> &mut [u8] {
217fae6e9adSlinfeng         unsafe { core::slice::from_raw_parts_mut(self.ptr as *mut u8, self.size) }
218fae6e9adSlinfeng     }
219fae6e9adSlinfeng }
220fae6e9adSlinfeng 
221fae6e9adSlinfeng impl BpfPerfEvent {
new(args: PerfProbeArgs) -> Self222fae6e9adSlinfeng     pub fn new(args: PerfProbeArgs) -> Self {
223fae6e9adSlinfeng         BpfPerfEvent {
224fae6e9adSlinfeng             _args: args,
225fae6e9adSlinfeng             data: SpinLock::new(BpfPerfEventData {
226fae6e9adSlinfeng                 enabled: false,
227fae6e9adSlinfeng                 mmap_page: RingPage::empty(),
228fae6e9adSlinfeng                 page_cache: PageCache::new(None),
229fae6e9adSlinfeng                 offset: 0,
230fae6e9adSlinfeng             }),
231fae6e9adSlinfeng         }
232fae6e9adSlinfeng     }
do_mmap(&self, _start: usize, len: usize, offset: usize) -> Result<()>233fae6e9adSlinfeng     pub fn do_mmap(&self, _start: usize, len: usize, offset: usize) -> Result<()> {
234fae6e9adSlinfeng         let mut data = self.data.lock();
235fae6e9adSlinfeng         // alloc page frame
236fae6e9adSlinfeng         let (phy_addr, page_count) =
237fae6e9adSlinfeng             unsafe { LockedFrameAllocator.allocate(PageFrameCount::new(len / PAGE_SIZE)) }
238fae6e9adSlinfeng                 .ok_or(SystemError::ENOSPC)?;
239fae6e9adSlinfeng         let mut page_manager_guard = page_manager_lock_irqsave();
240fae6e9adSlinfeng         let mut cur_phys = PhysPageFrame::new(phy_addr);
241fae6e9adSlinfeng         for i in 0..page_count.data() {
242fae6e9adSlinfeng             let page = Arc::new(Page::new(true, cur_phys.phys_address()));
243fae6e9adSlinfeng             let paddr = cur_phys.phys_address();
244fae6e9adSlinfeng             page_manager_guard.insert(paddr, &page);
245fae6e9adSlinfeng             data.page_cache.add_page(i, &page);
246fae6e9adSlinfeng             cur_phys = cur_phys.next();
247fae6e9adSlinfeng         }
248fae6e9adSlinfeng         let virt_addr = unsafe { MMArch::phys_2_virt(phy_addr) }.ok_or(SystemError::EFAULT)?;
249fae6e9adSlinfeng         // create mmap page
250fae6e9adSlinfeng         let mmap_page = RingPage::new_init(virt_addr.data(), len, phy_addr);
251fae6e9adSlinfeng         data.mmap_page = mmap_page;
252fae6e9adSlinfeng         data.offset = offset;
253fae6e9adSlinfeng         Ok(())
254fae6e9adSlinfeng     }
255fae6e9adSlinfeng 
write_event(&self, data: &[u8]) -> Result<()>256fae6e9adSlinfeng     pub fn write_event(&self, data: &[u8]) -> Result<()> {
257fae6e9adSlinfeng         let mut inner_data = self.data.lock();
258fae6e9adSlinfeng         inner_data.mmap_page.write_event(data)?;
259fae6e9adSlinfeng         Ok(())
260fae6e9adSlinfeng     }
261fae6e9adSlinfeng }
262fae6e9adSlinfeng 
263fae6e9adSlinfeng impl Drop for BpfPerfEvent {
drop(&mut self)264fae6e9adSlinfeng     fn drop(&mut self) {
265fae6e9adSlinfeng         let mut page_manager_guard = page_manager_lock_irqsave();
266fae6e9adSlinfeng         let data = self.data.lock();
267fae6e9adSlinfeng         let phy_addr = data.mmap_page.phys_addr;
268fae6e9adSlinfeng         let len = data.mmap_page.size;
269fae6e9adSlinfeng         let page_count = PageFrameCount::new(len / PAGE_SIZE);
270fae6e9adSlinfeng         let mut cur_phys = PhysPageFrame::new(phy_addr);
271fae6e9adSlinfeng         for _ in 0..page_count.data() {
272fae6e9adSlinfeng             page_manager_guard.remove_page(&cur_phys.phys_address());
273fae6e9adSlinfeng             cur_phys = cur_phys.next();
274fae6e9adSlinfeng         }
275fae6e9adSlinfeng     }
276fae6e9adSlinfeng }
277fae6e9adSlinfeng 
278fae6e9adSlinfeng impl IndexNode for BpfPerfEvent {
mmap(&self, start: usize, len: usize, offset: usize) -> Result<()>279fae6e9adSlinfeng     fn mmap(&self, start: usize, len: usize, offset: usize) -> Result<()> {
280fae6e9adSlinfeng         self.do_mmap(start, len, offset)
281fae6e9adSlinfeng     }
282fae6e9adSlinfeng 
read_at( &self, _offset: usize, _len: usize, _buf: &mut [u8], _data: SpinLockGuard<FilePrivateData>, ) -> Result<usize>283fae6e9adSlinfeng     fn read_at(
284fae6e9adSlinfeng         &self,
285fae6e9adSlinfeng         _offset: usize,
286fae6e9adSlinfeng         _len: usize,
287fae6e9adSlinfeng         _buf: &mut [u8],
288fae6e9adSlinfeng         _data: SpinLockGuard<FilePrivateData>,
289fae6e9adSlinfeng     ) -> Result<usize> {
290fae6e9adSlinfeng         panic!("PerfEventInode does not support read")
291fae6e9adSlinfeng     }
292fae6e9adSlinfeng 
write_at( &self, _offset: usize, _len: usize, _buf: &[u8], _data: SpinLockGuard<FilePrivateData>, ) -> Result<usize>293fae6e9adSlinfeng     fn write_at(
294fae6e9adSlinfeng         &self,
295fae6e9adSlinfeng         _offset: usize,
296fae6e9adSlinfeng         _len: usize,
297fae6e9adSlinfeng         _buf: &[u8],
298fae6e9adSlinfeng         _data: SpinLockGuard<FilePrivateData>,
299fae6e9adSlinfeng     ) -> Result<usize> {
300fae6e9adSlinfeng         panic!("PerfEventInode does not support write")
301fae6e9adSlinfeng     }
302fae6e9adSlinfeng 
fs(&self) -> Arc<dyn FileSystem>303fae6e9adSlinfeng     fn fs(&self) -> Arc<dyn FileSystem> {
304fae6e9adSlinfeng         panic!("PerfEventInode does not have a filesystem")
305fae6e9adSlinfeng     }
306fae6e9adSlinfeng 
as_any_ref(&self) -> &dyn Any307fae6e9adSlinfeng     fn as_any_ref(&self) -> &dyn Any {
308fae6e9adSlinfeng         self
309fae6e9adSlinfeng     }
list(&self) -> Result<Vec<String>>310fae6e9adSlinfeng     fn list(&self) -> Result<Vec<String>> {
311fae6e9adSlinfeng         Err(SystemError::ENOSYS)
312fae6e9adSlinfeng     }
313fae6e9adSlinfeng 
page_cache(&self) -> Option<Arc<PageCache>>314fae6e9adSlinfeng     fn page_cache(&self) -> Option<Arc<PageCache>> {
315fae6e9adSlinfeng         Some(self.data.lock().page_cache.clone())
316fae6e9adSlinfeng     }
317fae6e9adSlinfeng }
318fae6e9adSlinfeng 
319fae6e9adSlinfeng impl PerfEventOps for BpfPerfEvent {
enable(&self) -> Result<()>320fae6e9adSlinfeng     fn enable(&self) -> Result<()> {
321fae6e9adSlinfeng         self.data.lock().enabled = true;
322fae6e9adSlinfeng         Ok(())
323fae6e9adSlinfeng     }
disable(&self) -> Result<()>324fae6e9adSlinfeng     fn disable(&self) -> Result<()> {
325fae6e9adSlinfeng         self.data.lock().enabled = false;
326fae6e9adSlinfeng         Ok(())
327fae6e9adSlinfeng     }
readable(&self) -> bool328fae6e9adSlinfeng     fn readable(&self) -> bool {
329fae6e9adSlinfeng         self.data.lock().mmap_page.readable()
330fae6e9adSlinfeng     }
331fae6e9adSlinfeng }
332fae6e9adSlinfeng 
perf_event_open_bpf(args: PerfProbeArgs) -> BpfPerfEvent333fae6e9adSlinfeng pub fn perf_event_open_bpf(args: PerfProbeArgs) -> BpfPerfEvent {
334fae6e9adSlinfeng     BpfPerfEvent::new(args)
335fae6e9adSlinfeng }
336