1 use super::{PerfEventOps, Result};
2 use crate::arch::mm::LockedFrameAllocator;
3 use crate::arch::MMArch;
4 use crate::filesystem::vfs::file::PageCache;
5 use crate::filesystem::vfs::{FilePrivateData, FileSystem, IndexNode};
6 use crate::include::bindings::linux_bpf::{
7 perf_event_header, perf_event_mmap_page, perf_event_type,
8 };
9 use crate::libs::spinlock::{SpinLock, SpinLockGuard};
10 use crate::mm::allocator::page_frame::{FrameAllocator, PageFrameCount, PhysPageFrame};
11 use crate::mm::page::{page_manager_lock_irqsave, Page};
12 use crate::mm::{MemoryManagementArch, PhysAddr};
13 use crate::perf::util::{LostSamples, PerfProbeArgs, PerfSample, SampleHeader};
14 use alloc::string::String;
15 use alloc::sync::Arc;
16 use alloc::vec::Vec;
17 use core::any::Any;
18 use core::fmt::Debug;
19 use system_error::SystemError;
20 const PAGE_SIZE: usize = MMArch::PAGE_SIZE;
21 #[derive(Debug)]
22 pub struct BpfPerfEvent {
23 _args: PerfProbeArgs,
24 data: SpinLock<BpfPerfEventData>,
25 }
26
27 #[derive(Debug)]
28 pub struct BpfPerfEventData {
29 enabled: bool,
30 mmap_page: RingPage,
31 page_cache: Arc<PageCache>,
32 offset: usize,
33 }
34
35 #[derive(Debug)]
36 pub struct RingPage {
37 size: usize,
38 ptr: usize,
39 data_region_size: usize,
40 lost: usize,
41 phys_addr: PhysAddr,
42 }
43
44 impl RingPage {
empty() -> Self45 pub fn empty() -> Self {
46 RingPage {
47 ptr: 0,
48 size: 0,
49 data_region_size: 0,
50 lost: 0,
51 phys_addr: PhysAddr::new(0),
52 }
53 }
54
new_init(start: usize, len: usize, phys_addr: PhysAddr) -> Self55 pub fn new_init(start: usize, len: usize, phys_addr: PhysAddr) -> Self {
56 Self::init(start as _, len, phys_addr)
57 }
58
init(ptr: *mut u8, size: usize, phys_addr: PhysAddr) -> Self59 fn init(ptr: *mut u8, size: usize, phys_addr: PhysAddr) -> Self {
60 assert_eq!(size % PAGE_SIZE, 0);
61 assert!(size / PAGE_SIZE >= 2);
62 // The first page will be filled with perf_event_mmap_page
63 unsafe {
64 let perf_event_mmap_page = &mut *(ptr as *mut perf_event_mmap_page);
65 perf_event_mmap_page.data_offset = PAGE_SIZE as u64;
66 perf_event_mmap_page.data_size = (size - PAGE_SIZE) as u64;
67 // user will read sample or lost record from data_tail
68 perf_event_mmap_page.data_tail = 0;
69 // kernel will write sample or lost record from data_head
70 perf_event_mmap_page.data_head = 0;
71 // It is a ring buffer.
72 }
73 RingPage {
74 ptr: ptr as usize,
75 size,
76 data_region_size: size - PAGE_SIZE,
77 lost: 0,
78 phys_addr,
79 }
80 }
81
can_write(&self, data_size: usize, data_tail: usize, data_head: usize) -> bool82 fn can_write(&self, data_size: usize, data_tail: usize, data_head: usize) -> bool {
83 if (data_head + 1) % self.data_region_size == data_tail {
84 // The buffer is full
85 return false;
86 }
87 let capacity = if data_head >= data_tail {
88 self.data_region_size - data_head + data_tail
89 } else {
90 data_tail - data_head
91 };
92 data_size <= capacity
93 }
94
write_event(&mut self, data: &[u8]) -> Result<()>95 pub fn write_event(&mut self, data: &[u8]) -> Result<()> {
96 let data_tail = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_tail };
97 let data_head = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_head };
98 // data_tail..data_head is the region that can be written
99 // check if there is enough space to write the event
100 let sample_size = PerfSample::calculate_size(data.len());
101
102 let can_write_sample =
103 self.can_write(sample_size, *data_tail as usize, *data_head as usize);
104 // log::error!(
105 // "can_write_sample: {}, data_tail: {}, data_head: {}, data.len(): {}, region_size: {}",
106 // can_write_sample,
107 // *data_tail,
108 // *data_head,
109 // data.len(),
110 // self.data_region_size
111 // );
112 if !can_write_sample {
113 //we need record it to the lost record
114 self.lost += 1;
115 // log::error!(
116 // "Lost record: {}, data_tail: {}, data_head: {}",
117 // self.lost,
118 // *data_tail,
119 // *data_head
120 // );
121 Ok(())
122 } else {
123 // we can write the sample to the page
124 // If the lost record is not zero, we need to write the lost record first.
125 let can_write_lost_record = self.can_write(
126 size_of::<LostSamples>(),
127 *data_tail as usize,
128 *data_head as usize,
129 );
130 if self.lost > 0 && can_write_lost_record {
131 let new_data_head = self.write_lost(*data_head as usize)?;
132 *data_head = new_data_head as u64;
133 // log::info!(
134 // "Write lost record: {}, data_tail: {}, new_data_head: {}",
135 // self.lost,
136 // *data_tail,
137 // *data_head
138 // );
139 self.lost = 0;
140 self.write_event(data)
141 } else {
142 let new_data_head = self.write_sample(data, *data_head as usize)?;
143 *data_head = new_data_head as u64;
144 // log::info!(
145 // "Write sample record, data_tail: {}, new_data_head: {}",
146 // *data_tail,
147 // *data_head
148 // );
149 Ok(())
150 }
151 }
152 }
153
154 /// Write any data to the page.
155 ///
156 /// Return the new data_head
write_any(&mut self, data: &[u8], data_head: usize) -> Result<usize>157 fn write_any(&mut self, data: &[u8], data_head: usize) -> Result<usize> {
158 let data_region_len = self.data_region_size;
159 let data_region = self.as_mut_slice()[PAGE_SIZE..].as_mut();
160 let data_len = data.len();
161 let end = (data_head + data_len) % data_region_len;
162 let start = data_head;
163 if start < end {
164 data_region[start..end].copy_from_slice(data);
165 } else {
166 let first_len = data_region_len - start;
167 data_region[start..start + first_len].copy_from_slice(&data[..first_len]);
168 data_region[0..end].copy_from_slice(&data[first_len..]);
169 }
170 Ok(end)
171 }
172
173 /// Write a sample to the page.
write_sample(&mut self, data: &[u8], data_head: usize) -> Result<usize>174 fn write_sample(&mut self, data: &[u8], data_head: usize) -> Result<usize> {
175 let perf_sample = PerfSample {
176 s_hdr: SampleHeader {
177 header: perf_event_header {
178 type_: perf_event_type::PERF_RECORD_SAMPLE as u32,
179 misc: 0,
180 size: size_of::<SampleHeader>() as u16 + data.len() as u16,
181 },
182 size: data.len() as u32,
183 },
184 value: data,
185 };
186 let new_head = self.write_any(perf_sample.s_hdr.as_bytes(), data_head)?;
187 self.write_any(perf_sample.value, new_head)
188 }
189
190 /// Write a lost record to the page.
191 ///
192 /// Return the new data_head
write_lost(&mut self, data_head: usize) -> Result<usize>193 fn write_lost(&mut self, data_head: usize) -> Result<usize> {
194 let lost = LostSamples {
195 header: perf_event_header {
196 type_: perf_event_type::PERF_RECORD_LOST as u32,
197 misc: 0,
198 size: size_of::<LostSamples>() as u16,
199 },
200 id: 0,
201 count: self.lost as u64,
202 };
203 self.write_any(lost.as_bytes(), data_head)
204 }
205
readable(&self) -> bool206 pub fn readable(&self) -> bool {
207 let data_tail = unsafe { &(*(self.ptr as *mut perf_event_mmap_page)).data_tail };
208 let data_head = unsafe { &(*(self.ptr as *mut perf_event_mmap_page)).data_head };
209 data_tail != data_head
210 }
211
212 #[allow(dead_code)]
as_slice(&self) -> &[u8]213 pub fn as_slice(&self) -> &[u8] {
214 unsafe { core::slice::from_raw_parts(self.ptr as *const u8, self.size) }
215 }
as_mut_slice(&mut self) -> &mut [u8]216 pub fn as_mut_slice(&mut self) -> &mut [u8] {
217 unsafe { core::slice::from_raw_parts_mut(self.ptr as *mut u8, self.size) }
218 }
219 }
220
221 impl BpfPerfEvent {
new(args: PerfProbeArgs) -> Self222 pub fn new(args: PerfProbeArgs) -> Self {
223 BpfPerfEvent {
224 _args: args,
225 data: SpinLock::new(BpfPerfEventData {
226 enabled: false,
227 mmap_page: RingPage::empty(),
228 page_cache: PageCache::new(None),
229 offset: 0,
230 }),
231 }
232 }
do_mmap(&self, _start: usize, len: usize, offset: usize) -> Result<()>233 pub fn do_mmap(&self, _start: usize, len: usize, offset: usize) -> Result<()> {
234 let mut data = self.data.lock();
235 // alloc page frame
236 let (phy_addr, page_count) =
237 unsafe { LockedFrameAllocator.allocate(PageFrameCount::new(len / PAGE_SIZE)) }
238 .ok_or(SystemError::ENOSPC)?;
239 let mut page_manager_guard = page_manager_lock_irqsave();
240 let mut cur_phys = PhysPageFrame::new(phy_addr);
241 for i in 0..page_count.data() {
242 let page = Arc::new(Page::new(true, cur_phys.phys_address()));
243 let paddr = cur_phys.phys_address();
244 page_manager_guard.insert(paddr, &page);
245 data.page_cache.add_page(i, &page);
246 cur_phys = cur_phys.next();
247 }
248 let virt_addr = unsafe { MMArch::phys_2_virt(phy_addr) }.ok_or(SystemError::EFAULT)?;
249 // create mmap page
250 let mmap_page = RingPage::new_init(virt_addr.data(), len, phy_addr);
251 data.mmap_page = mmap_page;
252 data.offset = offset;
253 Ok(())
254 }
255
write_event(&self, data: &[u8]) -> Result<()>256 pub fn write_event(&self, data: &[u8]) -> Result<()> {
257 let mut inner_data = self.data.lock();
258 inner_data.mmap_page.write_event(data)?;
259 Ok(())
260 }
261 }
262
263 impl Drop for BpfPerfEvent {
drop(&mut self)264 fn drop(&mut self) {
265 let mut page_manager_guard = page_manager_lock_irqsave();
266 let data = self.data.lock();
267 let phy_addr = data.mmap_page.phys_addr;
268 let len = data.mmap_page.size;
269 let page_count = PageFrameCount::new(len / PAGE_SIZE);
270 let mut cur_phys = PhysPageFrame::new(phy_addr);
271 for _ in 0..page_count.data() {
272 page_manager_guard.remove_page(&cur_phys.phys_address());
273 cur_phys = cur_phys.next();
274 }
275 }
276 }
277
278 impl IndexNode for BpfPerfEvent {
mmap(&self, start: usize, len: usize, offset: usize) -> Result<()>279 fn mmap(&self, start: usize, len: usize, offset: usize) -> Result<()> {
280 self.do_mmap(start, len, offset)
281 }
282
read_at( &self, _offset: usize, _len: usize, _buf: &mut [u8], _data: SpinLockGuard<FilePrivateData>, ) -> Result<usize>283 fn read_at(
284 &self,
285 _offset: usize,
286 _len: usize,
287 _buf: &mut [u8],
288 _data: SpinLockGuard<FilePrivateData>,
289 ) -> Result<usize> {
290 panic!("PerfEventInode does not support read")
291 }
292
write_at( &self, _offset: usize, _len: usize, _buf: &[u8], _data: SpinLockGuard<FilePrivateData>, ) -> Result<usize>293 fn write_at(
294 &self,
295 _offset: usize,
296 _len: usize,
297 _buf: &[u8],
298 _data: SpinLockGuard<FilePrivateData>,
299 ) -> Result<usize> {
300 panic!("PerfEventInode does not support write")
301 }
302
fs(&self) -> Arc<dyn FileSystem>303 fn fs(&self) -> Arc<dyn FileSystem> {
304 panic!("PerfEventInode does not have a filesystem")
305 }
306
as_any_ref(&self) -> &dyn Any307 fn as_any_ref(&self) -> &dyn Any {
308 self
309 }
list(&self) -> Result<Vec<String>>310 fn list(&self) -> Result<Vec<String>> {
311 Err(SystemError::ENOSYS)
312 }
313
page_cache(&self) -> Option<Arc<PageCache>>314 fn page_cache(&self) -> Option<Arc<PageCache>> {
315 Some(self.data.lock().page_cache.clone())
316 }
317 }
318
319 impl PerfEventOps for BpfPerfEvent {
enable(&self) -> Result<()>320 fn enable(&self) -> Result<()> {
321 self.data.lock().enabled = true;
322 Ok(())
323 }
disable(&self) -> Result<()>324 fn disable(&self) -> Result<()> {
325 self.data.lock().enabled = false;
326 Ok(())
327 }
readable(&self) -> bool328 fn readable(&self) -> bool {
329 self.data.lock().mmap_page.readable()
330 }
331 }
332
perf_event_open_bpf(args: PerfProbeArgs) -> BpfPerfEvent333 pub fn perf_event_open_bpf(args: PerfProbeArgs) -> BpfPerfEvent {
334 BpfPerfEvent::new(args)
335 }
336