1fae6e9adSlinfeng use super::{PerfEventOps, Result};
2fae6e9adSlinfeng use crate::arch::mm::LockedFrameAllocator;
3fae6e9adSlinfeng use crate::arch::MMArch;
4fae6e9adSlinfeng use crate::filesystem::vfs::file::PageCache;
5fae6e9adSlinfeng use crate::filesystem::vfs::{FilePrivateData, FileSystem, IndexNode};
6fae6e9adSlinfeng use crate::include::bindings::linux_bpf::{
7fae6e9adSlinfeng perf_event_header, perf_event_mmap_page, perf_event_type,
8fae6e9adSlinfeng };
9fae6e9adSlinfeng use crate::libs::spinlock::{SpinLock, SpinLockGuard};
10fae6e9adSlinfeng use crate::mm::allocator::page_frame::{FrameAllocator, PageFrameCount, PhysPageFrame};
11fae6e9adSlinfeng use crate::mm::page::{page_manager_lock_irqsave, Page};
12fae6e9adSlinfeng use crate::mm::{MemoryManagementArch, PhysAddr};
13fae6e9adSlinfeng use crate::perf::util::{LostSamples, PerfProbeArgs, PerfSample, SampleHeader};
14fae6e9adSlinfeng use alloc::string::String;
15fae6e9adSlinfeng use alloc::sync::Arc;
16fae6e9adSlinfeng use alloc::vec::Vec;
17fae6e9adSlinfeng use core::any::Any;
18fae6e9adSlinfeng use core::fmt::Debug;
19fae6e9adSlinfeng use system_error::SystemError;
20fae6e9adSlinfeng const PAGE_SIZE: usize = MMArch::PAGE_SIZE;
21fae6e9adSlinfeng #[derive(Debug)]
22fae6e9adSlinfeng pub struct BpfPerfEvent {
23fae6e9adSlinfeng _args: PerfProbeArgs,
24fae6e9adSlinfeng data: SpinLock<BpfPerfEventData>,
25fae6e9adSlinfeng }
26fae6e9adSlinfeng
27fae6e9adSlinfeng #[derive(Debug)]
28fae6e9adSlinfeng pub struct BpfPerfEventData {
29fae6e9adSlinfeng enabled: bool,
30fae6e9adSlinfeng mmap_page: RingPage,
31fae6e9adSlinfeng page_cache: Arc<PageCache>,
32fae6e9adSlinfeng offset: usize,
33fae6e9adSlinfeng }
34fae6e9adSlinfeng
35fae6e9adSlinfeng #[derive(Debug)]
36fae6e9adSlinfeng pub struct RingPage {
37fae6e9adSlinfeng size: usize,
38fae6e9adSlinfeng ptr: usize,
39fae6e9adSlinfeng data_region_size: usize,
40fae6e9adSlinfeng lost: usize,
41fae6e9adSlinfeng phys_addr: PhysAddr,
42fae6e9adSlinfeng }
43fae6e9adSlinfeng
44fae6e9adSlinfeng impl RingPage {
empty() -> Self45fae6e9adSlinfeng pub fn empty() -> Self {
46fae6e9adSlinfeng RingPage {
47fae6e9adSlinfeng ptr: 0,
48fae6e9adSlinfeng size: 0,
49fae6e9adSlinfeng data_region_size: 0,
50fae6e9adSlinfeng lost: 0,
51fae6e9adSlinfeng phys_addr: PhysAddr::new(0),
52fae6e9adSlinfeng }
53fae6e9adSlinfeng }
54fae6e9adSlinfeng
new_init(start: usize, len: usize, phys_addr: PhysAddr) -> Self55fae6e9adSlinfeng pub fn new_init(start: usize, len: usize, phys_addr: PhysAddr) -> Self {
56fae6e9adSlinfeng Self::init(start as _, len, phys_addr)
57fae6e9adSlinfeng }
58fae6e9adSlinfeng
init(ptr: *mut u8, size: usize, phys_addr: PhysAddr) -> Self59fae6e9adSlinfeng fn init(ptr: *mut u8, size: usize, phys_addr: PhysAddr) -> Self {
60fae6e9adSlinfeng assert_eq!(size % PAGE_SIZE, 0);
61fae6e9adSlinfeng assert!(size / PAGE_SIZE >= 2);
62fae6e9adSlinfeng // The first page will be filled with perf_event_mmap_page
63fae6e9adSlinfeng unsafe {
64fae6e9adSlinfeng let perf_event_mmap_page = &mut *(ptr as *mut perf_event_mmap_page);
65fae6e9adSlinfeng perf_event_mmap_page.data_offset = PAGE_SIZE as u64;
66fae6e9adSlinfeng perf_event_mmap_page.data_size = (size - PAGE_SIZE) as u64;
67fae6e9adSlinfeng // user will read sample or lost record from data_tail
68fae6e9adSlinfeng perf_event_mmap_page.data_tail = 0;
69fae6e9adSlinfeng // kernel will write sample or lost record from data_head
70fae6e9adSlinfeng perf_event_mmap_page.data_head = 0;
71fae6e9adSlinfeng // It is a ring buffer.
72fae6e9adSlinfeng }
73fae6e9adSlinfeng RingPage {
74fae6e9adSlinfeng ptr: ptr as usize,
75fae6e9adSlinfeng size,
76fae6e9adSlinfeng data_region_size: size - PAGE_SIZE,
77fae6e9adSlinfeng lost: 0,
78fae6e9adSlinfeng phys_addr,
79fae6e9adSlinfeng }
80fae6e9adSlinfeng }
81fae6e9adSlinfeng
can_write(&self, data_size: usize, data_tail: usize, data_head: usize) -> bool82fae6e9adSlinfeng fn can_write(&self, data_size: usize, data_tail: usize, data_head: usize) -> bool {
83fae6e9adSlinfeng if (data_head + 1) % self.data_region_size == data_tail {
84fae6e9adSlinfeng // The buffer is full
85fae6e9adSlinfeng return false;
86fae6e9adSlinfeng }
87fae6e9adSlinfeng let capacity = if data_head >= data_tail {
88fae6e9adSlinfeng self.data_region_size - data_head + data_tail
89fae6e9adSlinfeng } else {
90fae6e9adSlinfeng data_tail - data_head
91fae6e9adSlinfeng };
92fae6e9adSlinfeng data_size <= capacity
93fae6e9adSlinfeng }
94fae6e9adSlinfeng
write_event(&mut self, data: &[u8]) -> Result<()>95fae6e9adSlinfeng pub fn write_event(&mut self, data: &[u8]) -> Result<()> {
96fae6e9adSlinfeng let data_tail = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_tail };
97fae6e9adSlinfeng let data_head = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_head };
98fae6e9adSlinfeng // data_tail..data_head is the region that can be written
99fae6e9adSlinfeng // check if there is enough space to write the event
100fae6e9adSlinfeng let sample_size = PerfSample::calculate_size(data.len());
101fae6e9adSlinfeng
102fae6e9adSlinfeng let can_write_sample =
103fae6e9adSlinfeng self.can_write(sample_size, *data_tail as usize, *data_head as usize);
104fae6e9adSlinfeng // log::error!(
105fae6e9adSlinfeng // "can_write_sample: {}, data_tail: {}, data_head: {}, data.len(): {}, region_size: {}",
106fae6e9adSlinfeng // can_write_sample,
107fae6e9adSlinfeng // *data_tail,
108fae6e9adSlinfeng // *data_head,
109fae6e9adSlinfeng // data.len(),
110fae6e9adSlinfeng // self.data_region_size
111fae6e9adSlinfeng // );
112fae6e9adSlinfeng if !can_write_sample {
113fae6e9adSlinfeng //we need record it to the lost record
114fae6e9adSlinfeng self.lost += 1;
115fae6e9adSlinfeng // log::error!(
116fae6e9adSlinfeng // "Lost record: {}, data_tail: {}, data_head: {}",
117fae6e9adSlinfeng // self.lost,
118fae6e9adSlinfeng // *data_tail,
119fae6e9adSlinfeng // *data_head
120fae6e9adSlinfeng // );
121fae6e9adSlinfeng Ok(())
122fae6e9adSlinfeng } else {
123fae6e9adSlinfeng // we can write the sample to the page
124fae6e9adSlinfeng // If the lost record is not zero, we need to write the lost record first.
125fae6e9adSlinfeng let can_write_lost_record = self.can_write(
126fae6e9adSlinfeng size_of::<LostSamples>(),
127fae6e9adSlinfeng *data_tail as usize,
128fae6e9adSlinfeng *data_head as usize,
129fae6e9adSlinfeng );
130fae6e9adSlinfeng if self.lost > 0 && can_write_lost_record {
131fae6e9adSlinfeng let new_data_head = self.write_lost(*data_head as usize)?;
132fae6e9adSlinfeng *data_head = new_data_head as u64;
133fae6e9adSlinfeng // log::info!(
134fae6e9adSlinfeng // "Write lost record: {}, data_tail: {}, new_data_head: {}",
135fae6e9adSlinfeng // self.lost,
136fae6e9adSlinfeng // *data_tail,
137fae6e9adSlinfeng // *data_head
138fae6e9adSlinfeng // );
139fae6e9adSlinfeng self.lost = 0;
140fae6e9adSlinfeng self.write_event(data)
141fae6e9adSlinfeng } else {
142fae6e9adSlinfeng let new_data_head = self.write_sample(data, *data_head as usize)?;
143fae6e9adSlinfeng *data_head = new_data_head as u64;
144fae6e9adSlinfeng // log::info!(
145fae6e9adSlinfeng // "Write sample record, data_tail: {}, new_data_head: {}",
146fae6e9adSlinfeng // *data_tail,
147fae6e9adSlinfeng // *data_head
148fae6e9adSlinfeng // );
149fae6e9adSlinfeng Ok(())
150fae6e9adSlinfeng }
151fae6e9adSlinfeng }
152fae6e9adSlinfeng }
153fae6e9adSlinfeng
154fae6e9adSlinfeng /// Write any data to the page.
155fae6e9adSlinfeng ///
156fae6e9adSlinfeng /// Return the new data_head
write_any(&mut self, data: &[u8], data_head: usize) -> Result<usize>157fae6e9adSlinfeng fn write_any(&mut self, data: &[u8], data_head: usize) -> Result<usize> {
158fae6e9adSlinfeng let data_region_len = self.data_region_size;
159fae6e9adSlinfeng let data_region = self.as_mut_slice()[PAGE_SIZE..].as_mut();
160fae6e9adSlinfeng let data_len = data.len();
161fae6e9adSlinfeng let end = (data_head + data_len) % data_region_len;
162fae6e9adSlinfeng let start = data_head;
163fae6e9adSlinfeng if start < end {
164fae6e9adSlinfeng data_region[start..end].copy_from_slice(data);
165fae6e9adSlinfeng } else {
166fae6e9adSlinfeng let first_len = data_region_len - start;
167fae6e9adSlinfeng data_region[start..start + first_len].copy_from_slice(&data[..first_len]);
168fae6e9adSlinfeng data_region[0..end].copy_from_slice(&data[first_len..]);
169fae6e9adSlinfeng }
170fae6e9adSlinfeng Ok(end)
171fae6e9adSlinfeng }
172fae6e9adSlinfeng
173fae6e9adSlinfeng /// Write a sample to the page.
write_sample(&mut self, data: &[u8], data_head: usize) -> Result<usize>174fae6e9adSlinfeng fn write_sample(&mut self, data: &[u8], data_head: usize) -> Result<usize> {
175fae6e9adSlinfeng let perf_sample = PerfSample {
176fae6e9adSlinfeng s_hdr: SampleHeader {
177fae6e9adSlinfeng header: perf_event_header {
178fae6e9adSlinfeng type_: perf_event_type::PERF_RECORD_SAMPLE as u32,
179fae6e9adSlinfeng misc: 0,
180fae6e9adSlinfeng size: size_of::<SampleHeader>() as u16 + data.len() as u16,
181fae6e9adSlinfeng },
182fae6e9adSlinfeng size: data.len() as u32,
183fae6e9adSlinfeng },
184fae6e9adSlinfeng value: data,
185fae6e9adSlinfeng };
186fae6e9adSlinfeng let new_head = self.write_any(perf_sample.s_hdr.as_bytes(), data_head)?;
187fae6e9adSlinfeng self.write_any(perf_sample.value, new_head)
188fae6e9adSlinfeng }
189fae6e9adSlinfeng
190fae6e9adSlinfeng /// Write a lost record to the page.
191fae6e9adSlinfeng ///
192fae6e9adSlinfeng /// Return the new data_head
write_lost(&mut self, data_head: usize) -> Result<usize>193fae6e9adSlinfeng fn write_lost(&mut self, data_head: usize) -> Result<usize> {
194fae6e9adSlinfeng let lost = LostSamples {
195fae6e9adSlinfeng header: perf_event_header {
196fae6e9adSlinfeng type_: perf_event_type::PERF_RECORD_LOST as u32,
197fae6e9adSlinfeng misc: 0,
198fae6e9adSlinfeng size: size_of::<LostSamples>() as u16,
199fae6e9adSlinfeng },
200fae6e9adSlinfeng id: 0,
201fae6e9adSlinfeng count: self.lost as u64,
202fae6e9adSlinfeng };
203fae6e9adSlinfeng self.write_any(lost.as_bytes(), data_head)
204fae6e9adSlinfeng }
205fae6e9adSlinfeng
readable(&self) -> bool206fae6e9adSlinfeng pub fn readable(&self) -> bool {
207fae6e9adSlinfeng let data_tail = unsafe { &(*(self.ptr as *mut perf_event_mmap_page)).data_tail };
208fae6e9adSlinfeng let data_head = unsafe { &(*(self.ptr as *mut perf_event_mmap_page)).data_head };
209fae6e9adSlinfeng data_tail != data_head
210fae6e9adSlinfeng }
211*7c28051eSlinfeng
212*7c28051eSlinfeng #[allow(dead_code)]
as_slice(&self) -> &[u8]213fae6e9adSlinfeng pub fn as_slice(&self) -> &[u8] {
214fae6e9adSlinfeng unsafe { core::slice::from_raw_parts(self.ptr as *const u8, self.size) }
215fae6e9adSlinfeng }
as_mut_slice(&mut self) -> &mut [u8]216fae6e9adSlinfeng pub fn as_mut_slice(&mut self) -> &mut [u8] {
217fae6e9adSlinfeng unsafe { core::slice::from_raw_parts_mut(self.ptr as *mut u8, self.size) }
218fae6e9adSlinfeng }
219fae6e9adSlinfeng }
220fae6e9adSlinfeng
221fae6e9adSlinfeng impl BpfPerfEvent {
new(args: PerfProbeArgs) -> Self222fae6e9adSlinfeng pub fn new(args: PerfProbeArgs) -> Self {
223fae6e9adSlinfeng BpfPerfEvent {
224fae6e9adSlinfeng _args: args,
225fae6e9adSlinfeng data: SpinLock::new(BpfPerfEventData {
226fae6e9adSlinfeng enabled: false,
227fae6e9adSlinfeng mmap_page: RingPage::empty(),
228fae6e9adSlinfeng page_cache: PageCache::new(None),
229fae6e9adSlinfeng offset: 0,
230fae6e9adSlinfeng }),
231fae6e9adSlinfeng }
232fae6e9adSlinfeng }
do_mmap(&self, _start: usize, len: usize, offset: usize) -> Result<()>233fae6e9adSlinfeng pub fn do_mmap(&self, _start: usize, len: usize, offset: usize) -> Result<()> {
234fae6e9adSlinfeng let mut data = self.data.lock();
235fae6e9adSlinfeng // alloc page frame
236fae6e9adSlinfeng let (phy_addr, page_count) =
237fae6e9adSlinfeng unsafe { LockedFrameAllocator.allocate(PageFrameCount::new(len / PAGE_SIZE)) }
238fae6e9adSlinfeng .ok_or(SystemError::ENOSPC)?;
239fae6e9adSlinfeng let mut page_manager_guard = page_manager_lock_irqsave();
240fae6e9adSlinfeng let mut cur_phys = PhysPageFrame::new(phy_addr);
241fae6e9adSlinfeng for i in 0..page_count.data() {
242fae6e9adSlinfeng let page = Arc::new(Page::new(true, cur_phys.phys_address()));
243fae6e9adSlinfeng let paddr = cur_phys.phys_address();
244fae6e9adSlinfeng page_manager_guard.insert(paddr, &page);
245fae6e9adSlinfeng data.page_cache.add_page(i, &page);
246fae6e9adSlinfeng cur_phys = cur_phys.next();
247fae6e9adSlinfeng }
248fae6e9adSlinfeng let virt_addr = unsafe { MMArch::phys_2_virt(phy_addr) }.ok_or(SystemError::EFAULT)?;
249fae6e9adSlinfeng // create mmap page
250fae6e9adSlinfeng let mmap_page = RingPage::new_init(virt_addr.data(), len, phy_addr);
251fae6e9adSlinfeng data.mmap_page = mmap_page;
252fae6e9adSlinfeng data.offset = offset;
253fae6e9adSlinfeng Ok(())
254fae6e9adSlinfeng }
255fae6e9adSlinfeng
write_event(&self, data: &[u8]) -> Result<()>256fae6e9adSlinfeng pub fn write_event(&self, data: &[u8]) -> Result<()> {
257fae6e9adSlinfeng let mut inner_data = self.data.lock();
258fae6e9adSlinfeng inner_data.mmap_page.write_event(data)?;
259fae6e9adSlinfeng Ok(())
260fae6e9adSlinfeng }
261fae6e9adSlinfeng }
262fae6e9adSlinfeng
263fae6e9adSlinfeng impl Drop for BpfPerfEvent {
drop(&mut self)264fae6e9adSlinfeng fn drop(&mut self) {
265fae6e9adSlinfeng let mut page_manager_guard = page_manager_lock_irqsave();
266fae6e9adSlinfeng let data = self.data.lock();
267fae6e9adSlinfeng let phy_addr = data.mmap_page.phys_addr;
268fae6e9adSlinfeng let len = data.mmap_page.size;
269fae6e9adSlinfeng let page_count = PageFrameCount::new(len / PAGE_SIZE);
270fae6e9adSlinfeng let mut cur_phys = PhysPageFrame::new(phy_addr);
271fae6e9adSlinfeng for _ in 0..page_count.data() {
272fae6e9adSlinfeng page_manager_guard.remove_page(&cur_phys.phys_address());
273fae6e9adSlinfeng cur_phys = cur_phys.next();
274fae6e9adSlinfeng }
275fae6e9adSlinfeng }
276fae6e9adSlinfeng }
277fae6e9adSlinfeng
278fae6e9adSlinfeng impl IndexNode for BpfPerfEvent {
mmap(&self, start: usize, len: usize, offset: usize) -> Result<()>279fae6e9adSlinfeng fn mmap(&self, start: usize, len: usize, offset: usize) -> Result<()> {
280fae6e9adSlinfeng self.do_mmap(start, len, offset)
281fae6e9adSlinfeng }
282fae6e9adSlinfeng
read_at( &self, _offset: usize, _len: usize, _buf: &mut [u8], _data: SpinLockGuard<FilePrivateData>, ) -> Result<usize>283fae6e9adSlinfeng fn read_at(
284fae6e9adSlinfeng &self,
285fae6e9adSlinfeng _offset: usize,
286fae6e9adSlinfeng _len: usize,
287fae6e9adSlinfeng _buf: &mut [u8],
288fae6e9adSlinfeng _data: SpinLockGuard<FilePrivateData>,
289fae6e9adSlinfeng ) -> Result<usize> {
290fae6e9adSlinfeng panic!("PerfEventInode does not support read")
291fae6e9adSlinfeng }
292fae6e9adSlinfeng
write_at( &self, _offset: usize, _len: usize, _buf: &[u8], _data: SpinLockGuard<FilePrivateData>, ) -> Result<usize>293fae6e9adSlinfeng fn write_at(
294fae6e9adSlinfeng &self,
295fae6e9adSlinfeng _offset: usize,
296fae6e9adSlinfeng _len: usize,
297fae6e9adSlinfeng _buf: &[u8],
298fae6e9adSlinfeng _data: SpinLockGuard<FilePrivateData>,
299fae6e9adSlinfeng ) -> Result<usize> {
300fae6e9adSlinfeng panic!("PerfEventInode does not support write")
301fae6e9adSlinfeng }
302fae6e9adSlinfeng
fs(&self) -> Arc<dyn FileSystem>303fae6e9adSlinfeng fn fs(&self) -> Arc<dyn FileSystem> {
304fae6e9adSlinfeng panic!("PerfEventInode does not have a filesystem")
305fae6e9adSlinfeng }
306fae6e9adSlinfeng
as_any_ref(&self) -> &dyn Any307fae6e9adSlinfeng fn as_any_ref(&self) -> &dyn Any {
308fae6e9adSlinfeng self
309fae6e9adSlinfeng }
list(&self) -> Result<Vec<String>>310fae6e9adSlinfeng fn list(&self) -> Result<Vec<String>> {
311fae6e9adSlinfeng Err(SystemError::ENOSYS)
312fae6e9adSlinfeng }
313fae6e9adSlinfeng
page_cache(&self) -> Option<Arc<PageCache>>314fae6e9adSlinfeng fn page_cache(&self) -> Option<Arc<PageCache>> {
315fae6e9adSlinfeng Some(self.data.lock().page_cache.clone())
316fae6e9adSlinfeng }
317fae6e9adSlinfeng }
318fae6e9adSlinfeng
319fae6e9adSlinfeng impl PerfEventOps for BpfPerfEvent {
enable(&self) -> Result<()>320fae6e9adSlinfeng fn enable(&self) -> Result<()> {
321fae6e9adSlinfeng self.data.lock().enabled = true;
322fae6e9adSlinfeng Ok(())
323fae6e9adSlinfeng }
disable(&self) -> Result<()>324fae6e9adSlinfeng fn disable(&self) -> Result<()> {
325fae6e9adSlinfeng self.data.lock().enabled = false;
326fae6e9adSlinfeng Ok(())
327fae6e9adSlinfeng }
readable(&self) -> bool328fae6e9adSlinfeng fn readable(&self) -> bool {
329fae6e9adSlinfeng self.data.lock().mmap_page.readable()
330fae6e9adSlinfeng }
331fae6e9adSlinfeng }
332fae6e9adSlinfeng
perf_event_open_bpf(args: PerfProbeArgs) -> BpfPerfEvent333fae6e9adSlinfeng pub fn perf_event_open_bpf(args: PerfProbeArgs) -> BpfPerfEvent {
334fae6e9adSlinfeng BpfPerfEvent::new(args)
335fae6e9adSlinfeng }
336