xref: /DragonOS/kernel/src/perf/bpf.rs (revision fae6e9ade46a52976ad5d099643d51cc20876448)
1 use super::{PerfEventOps, Result};
2 use crate::arch::mm::LockedFrameAllocator;
3 use crate::arch::MMArch;
4 use crate::filesystem::vfs::file::PageCache;
5 use crate::filesystem::vfs::{FilePrivateData, FileSystem, IndexNode};
6 use crate::include::bindings::linux_bpf::{
7     perf_event_header, perf_event_mmap_page, perf_event_type,
8 };
9 use crate::libs::spinlock::{SpinLock, SpinLockGuard};
10 use crate::mm::allocator::page_frame::{FrameAllocator, PageFrameCount, PhysPageFrame};
11 use crate::mm::page::{page_manager_lock_irqsave, Page};
12 use crate::mm::{MemoryManagementArch, PhysAddr};
13 use crate::perf::util::{LostSamples, PerfProbeArgs, PerfSample, SampleHeader};
14 use alloc::string::String;
15 use alloc::sync::Arc;
16 use alloc::vec::Vec;
17 use core::any::Any;
18 use core::fmt::Debug;
19 use system_error::SystemError;
20 const PAGE_SIZE: usize = MMArch::PAGE_SIZE;
21 #[derive(Debug)]
22 pub struct BpfPerfEvent {
23     _args: PerfProbeArgs,
24     data: SpinLock<BpfPerfEventData>,
25 }
26 
27 #[derive(Debug)]
28 pub struct BpfPerfEventData {
29     enabled: bool,
30     mmap_page: RingPage,
31     page_cache: Arc<PageCache>,
32     offset: usize,
33 }
34 
35 #[derive(Debug)]
36 pub struct RingPage {
37     size: usize,
38     ptr: usize,
39     data_region_size: usize,
40     lost: usize,
41     phys_addr: PhysAddr,
42 }
43 
44 impl RingPage {
empty() -> Self45     pub fn empty() -> Self {
46         RingPage {
47             ptr: 0,
48             size: 0,
49             data_region_size: 0,
50             lost: 0,
51             phys_addr: PhysAddr::new(0),
52         }
53     }
54 
new_init(start: usize, len: usize, phys_addr: PhysAddr) -> Self55     pub fn new_init(start: usize, len: usize, phys_addr: PhysAddr) -> Self {
56         Self::init(start as _, len, phys_addr)
57     }
58 
init(ptr: *mut u8, size: usize, phys_addr: PhysAddr) -> Self59     fn init(ptr: *mut u8, size: usize, phys_addr: PhysAddr) -> Self {
60         assert_eq!(size % PAGE_SIZE, 0);
61         assert!(size / PAGE_SIZE >= 2);
62         // The first page will be filled with perf_event_mmap_page
63         unsafe {
64             let perf_event_mmap_page = &mut *(ptr as *mut perf_event_mmap_page);
65             perf_event_mmap_page.data_offset = PAGE_SIZE as u64;
66             perf_event_mmap_page.data_size = (size - PAGE_SIZE) as u64;
67             // user will read sample or lost record from data_tail
68             perf_event_mmap_page.data_tail = 0;
69             // kernel will write sample or lost record from data_head
70             perf_event_mmap_page.data_head = 0;
71             // It is a ring buffer.
72         }
73         RingPage {
74             ptr: ptr as usize,
75             size,
76             data_region_size: size - PAGE_SIZE,
77             lost: 0,
78             phys_addr,
79         }
80     }
81 
can_write(&self, data_size: usize, data_tail: usize, data_head: usize) -> bool82     fn can_write(&self, data_size: usize, data_tail: usize, data_head: usize) -> bool {
83         if (data_head + 1) % self.data_region_size == data_tail {
84             // The buffer is full
85             return false;
86         }
87         let capacity = if data_head >= data_tail {
88             self.data_region_size - data_head + data_tail
89         } else {
90             data_tail - data_head
91         };
92         data_size <= capacity
93     }
94 
write_event(&mut self, data: &[u8]) -> Result<()>95     pub fn write_event(&mut self, data: &[u8]) -> Result<()> {
96         let data_tail = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_tail };
97         let data_head = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_head };
98         // data_tail..data_head is the region that can be written
99         // check if there is enough space to write the event
100         let sample_size = PerfSample::calculate_size(data.len());
101 
102         let can_write_sample =
103             self.can_write(sample_size, *data_tail as usize, *data_head as usize);
104         // log::error!(
105         //     "can_write_sample: {}, data_tail: {}, data_head: {}, data.len(): {}, region_size: {}",
106         //     can_write_sample,
107         //     *data_tail,
108         //     *data_head,
109         //     data.len(),
110         //     self.data_region_size
111         // );
112         if !can_write_sample {
113             //we need record it to the lost record
114             self.lost += 1;
115             // log::error!(
116             //     "Lost record: {}, data_tail: {}, data_head: {}",
117             //     self.lost,
118             //     *data_tail,
119             //     *data_head
120             // );
121             Ok(())
122         } else {
123             // we can write the sample to the page
124             // If the lost record is not zero, we need to write the lost record first.
125             let can_write_lost_record = self.can_write(
126                 size_of::<LostSamples>(),
127                 *data_tail as usize,
128                 *data_head as usize,
129             );
130             if self.lost > 0 && can_write_lost_record {
131                 let new_data_head = self.write_lost(*data_head as usize)?;
132                 *data_head = new_data_head as u64;
133                 // log::info!(
134                 //     "Write lost record: {}, data_tail: {}, new_data_head: {}",
135                 //     self.lost,
136                 //     *data_tail,
137                 //     *data_head
138                 // );
139                 self.lost = 0;
140                 self.write_event(data)
141             } else {
142                 let new_data_head = self.write_sample(data, *data_head as usize)?;
143                 *data_head = new_data_head as u64;
144                 // log::info!(
145                 //     "Write sample record, data_tail: {}, new_data_head: {}",
146                 //     *data_tail,
147                 //     *data_head
148                 // );
149                 Ok(())
150             }
151         }
152     }
153 
154     /// Write any data to the page.
155     ///
156     /// Return the new data_head
write_any(&mut self, data: &[u8], data_head: usize) -> Result<usize>157     fn write_any(&mut self, data: &[u8], data_head: usize) -> Result<usize> {
158         let data_region_len = self.data_region_size;
159         let data_region = self.as_mut_slice()[PAGE_SIZE..].as_mut();
160         let data_len = data.len();
161         let end = (data_head + data_len) % data_region_len;
162         let start = data_head;
163         if start < end {
164             data_region[start..end].copy_from_slice(data);
165         } else {
166             let first_len = data_region_len - start;
167             data_region[start..start + first_len].copy_from_slice(&data[..first_len]);
168             data_region[0..end].copy_from_slice(&data[first_len..]);
169         }
170         Ok(end)
171     }
172 
173     /// Write a sample to the page.
write_sample(&mut self, data: &[u8], data_head: usize) -> Result<usize>174     fn write_sample(&mut self, data: &[u8], data_head: usize) -> Result<usize> {
175         let perf_sample = PerfSample {
176             s_hdr: SampleHeader {
177                 header: perf_event_header {
178                     type_: perf_event_type::PERF_RECORD_SAMPLE as u32,
179                     misc: 0,
180                     size: size_of::<SampleHeader>() as u16 + data.len() as u16,
181                 },
182                 size: data.len() as u32,
183             },
184             value: data,
185         };
186         let new_head = self.write_any(perf_sample.s_hdr.as_bytes(), data_head)?;
187         self.write_any(perf_sample.value, new_head)
188     }
189 
190     /// Write a lost record to the page.
191     ///
192     /// Return the new data_head
write_lost(&mut self, data_head: usize) -> Result<usize>193     fn write_lost(&mut self, data_head: usize) -> Result<usize> {
194         let lost = LostSamples {
195             header: perf_event_header {
196                 type_: perf_event_type::PERF_RECORD_LOST as u32,
197                 misc: 0,
198                 size: size_of::<LostSamples>() as u16,
199             },
200             id: 0,
201             count: self.lost as u64,
202         };
203         self.write_any(lost.as_bytes(), data_head)
204     }
205 
readable(&self) -> bool206     pub fn readable(&self) -> bool {
207         let data_tail = unsafe { &(*(self.ptr as *mut perf_event_mmap_page)).data_tail };
208         let data_head = unsafe { &(*(self.ptr as *mut perf_event_mmap_page)).data_head };
209         data_tail != data_head
210     }
as_slice(&self) -> &[u8]211     pub fn as_slice(&self) -> &[u8] {
212         unsafe { core::slice::from_raw_parts(self.ptr as *const u8, self.size) }
213     }
as_mut_slice(&mut self) -> &mut [u8]214     pub fn as_mut_slice(&mut self) -> &mut [u8] {
215         unsafe { core::slice::from_raw_parts_mut(self.ptr as *mut u8, self.size) }
216     }
217 }
218 
219 impl BpfPerfEvent {
new(args: PerfProbeArgs) -> Self220     pub fn new(args: PerfProbeArgs) -> Self {
221         BpfPerfEvent {
222             _args: args,
223             data: SpinLock::new(BpfPerfEventData {
224                 enabled: false,
225                 mmap_page: RingPage::empty(),
226                 page_cache: PageCache::new(None),
227                 offset: 0,
228             }),
229         }
230     }
do_mmap(&self, _start: usize, len: usize, offset: usize) -> Result<()>231     pub fn do_mmap(&self, _start: usize, len: usize, offset: usize) -> Result<()> {
232         let mut data = self.data.lock();
233         // alloc page frame
234         let (phy_addr, page_count) =
235             unsafe { LockedFrameAllocator.allocate(PageFrameCount::new(len / PAGE_SIZE)) }
236                 .ok_or(SystemError::ENOSPC)?;
237         let mut page_manager_guard = page_manager_lock_irqsave();
238         let mut cur_phys = PhysPageFrame::new(phy_addr);
239         for i in 0..page_count.data() {
240             let page = Arc::new(Page::new(true, cur_phys.phys_address()));
241             let paddr = cur_phys.phys_address();
242             page_manager_guard.insert(paddr, &page);
243             data.page_cache.add_page(i, &page);
244             cur_phys = cur_phys.next();
245         }
246         let virt_addr = unsafe { MMArch::phys_2_virt(phy_addr) }.ok_or(SystemError::EFAULT)?;
247         // create mmap page
248         let mmap_page = RingPage::new_init(virt_addr.data(), len, phy_addr);
249         data.mmap_page = mmap_page;
250         data.offset = offset;
251         Ok(())
252     }
253 
write_event(&self, data: &[u8]) -> Result<()>254     pub fn write_event(&self, data: &[u8]) -> Result<()> {
255         let mut inner_data = self.data.lock();
256         inner_data.mmap_page.write_event(data)?;
257         Ok(())
258     }
259 }
260 
261 impl Drop for BpfPerfEvent {
drop(&mut self)262     fn drop(&mut self) {
263         let mut page_manager_guard = page_manager_lock_irqsave();
264         let data = self.data.lock();
265         let phy_addr = data.mmap_page.phys_addr;
266         let len = data.mmap_page.size;
267         let page_count = PageFrameCount::new(len / PAGE_SIZE);
268         let mut cur_phys = PhysPageFrame::new(phy_addr);
269         for _ in 0..page_count.data() {
270             page_manager_guard.remove_page(&cur_phys.phys_address());
271             cur_phys = cur_phys.next();
272         }
273     }
274 }
275 
276 impl IndexNode for BpfPerfEvent {
mmap(&self, start: usize, len: usize, offset: usize) -> Result<()>277     fn mmap(&self, start: usize, len: usize, offset: usize) -> Result<()> {
278         self.do_mmap(start, len, offset)
279     }
280 
read_at( &self, _offset: usize, _len: usize, _buf: &mut [u8], _data: SpinLockGuard<FilePrivateData>, ) -> Result<usize>281     fn read_at(
282         &self,
283         _offset: usize,
284         _len: usize,
285         _buf: &mut [u8],
286         _data: SpinLockGuard<FilePrivateData>,
287     ) -> Result<usize> {
288         panic!("PerfEventInode does not support read")
289     }
290 
write_at( &self, _offset: usize, _len: usize, _buf: &[u8], _data: SpinLockGuard<FilePrivateData>, ) -> Result<usize>291     fn write_at(
292         &self,
293         _offset: usize,
294         _len: usize,
295         _buf: &[u8],
296         _data: SpinLockGuard<FilePrivateData>,
297     ) -> Result<usize> {
298         panic!("PerfEventInode does not support write")
299     }
300 
fs(&self) -> Arc<dyn FileSystem>301     fn fs(&self) -> Arc<dyn FileSystem> {
302         panic!("PerfEventInode does not have a filesystem")
303     }
304 
as_any_ref(&self) -> &dyn Any305     fn as_any_ref(&self) -> &dyn Any {
306         self
307     }
list(&self) -> Result<Vec<String>>308     fn list(&self) -> Result<Vec<String>> {
309         Err(SystemError::ENOSYS)
310     }
311 
page_cache(&self) -> Option<Arc<PageCache>>312     fn page_cache(&self) -> Option<Arc<PageCache>> {
313         Some(self.data.lock().page_cache.clone())
314     }
315 }
316 
317 impl PerfEventOps for BpfPerfEvent {
enable(&self) -> Result<()>318     fn enable(&self) -> Result<()> {
319         self.data.lock().enabled = true;
320         Ok(())
321     }
disable(&self) -> Result<()>322     fn disable(&self) -> Result<()> {
323         self.data.lock().enabled = false;
324         Ok(())
325     }
readable(&self) -> bool326     fn readable(&self) -> bool {
327         self.data.lock().mmap_page.readable()
328     }
329 }
330 
perf_event_open_bpf(args: PerfProbeArgs) -> BpfPerfEvent331 pub fn perf_event_open_bpf(args: PerfProbeArgs) -> BpfPerfEvent {
332     BpfPerfEvent::new(args)
333 }
334