1 use super::{PerfEventOps, Result};
2 use crate::arch::mm::LockedFrameAllocator;
3 use crate::arch::MMArch;
4 use crate::filesystem::vfs::file::PageCache;
5 use crate::filesystem::vfs::{FilePrivateData, FileSystem, IndexNode};
6 use crate::include::bindings::linux_bpf::{
7 perf_event_header, perf_event_mmap_page, perf_event_type,
8 };
9 use crate::libs::spinlock::{SpinLock, SpinLockGuard};
10 use crate::mm::allocator::page_frame::{FrameAllocator, PageFrameCount, PhysPageFrame};
11 use crate::mm::page::{page_manager_lock_irqsave, Page};
12 use crate::mm::{MemoryManagementArch, PhysAddr};
13 use crate::perf::util::{LostSamples, PerfProbeArgs, PerfSample, SampleHeader};
14 use alloc::string::String;
15 use alloc::sync::Arc;
16 use alloc::vec::Vec;
17 use core::any::Any;
18 use core::fmt::Debug;
19 use system_error::SystemError;
20 const PAGE_SIZE: usize = MMArch::PAGE_SIZE;
21 #[derive(Debug)]
22 pub struct BpfPerfEvent {
23 _args: PerfProbeArgs,
24 data: SpinLock<BpfPerfEventData>,
25 }
26
27 #[derive(Debug)]
28 pub struct BpfPerfEventData {
29 enabled: bool,
30 mmap_page: RingPage,
31 page_cache: Arc<PageCache>,
32 offset: usize,
33 }
34
35 #[derive(Debug)]
36 pub struct RingPage {
37 size: usize,
38 ptr: usize,
39 data_region_size: usize,
40 lost: usize,
41 phys_addr: PhysAddr,
42 }
43
44 impl RingPage {
empty() -> Self45 pub fn empty() -> Self {
46 RingPage {
47 ptr: 0,
48 size: 0,
49 data_region_size: 0,
50 lost: 0,
51 phys_addr: PhysAddr::new(0),
52 }
53 }
54
new_init(start: usize, len: usize, phys_addr: PhysAddr) -> Self55 pub fn new_init(start: usize, len: usize, phys_addr: PhysAddr) -> Self {
56 Self::init(start as _, len, phys_addr)
57 }
58
init(ptr: *mut u8, size: usize, phys_addr: PhysAddr) -> Self59 fn init(ptr: *mut u8, size: usize, phys_addr: PhysAddr) -> Self {
60 assert_eq!(size % PAGE_SIZE, 0);
61 assert!(size / PAGE_SIZE >= 2);
62 // The first page will be filled with perf_event_mmap_page
63 unsafe {
64 let perf_event_mmap_page = &mut *(ptr as *mut perf_event_mmap_page);
65 perf_event_mmap_page.data_offset = PAGE_SIZE as u64;
66 perf_event_mmap_page.data_size = (size - PAGE_SIZE) as u64;
67 // user will read sample or lost record from data_tail
68 perf_event_mmap_page.data_tail = 0;
69 // kernel will write sample or lost record from data_head
70 perf_event_mmap_page.data_head = 0;
71 // It is a ring buffer.
72 }
73 RingPage {
74 ptr: ptr as usize,
75 size,
76 data_region_size: size - PAGE_SIZE,
77 lost: 0,
78 phys_addr,
79 }
80 }
81
can_write(&self, data_size: usize, data_tail: usize, data_head: usize) -> bool82 fn can_write(&self, data_size: usize, data_tail: usize, data_head: usize) -> bool {
83 if (data_head + 1) % self.data_region_size == data_tail {
84 // The buffer is full
85 return false;
86 }
87 let capacity = if data_head >= data_tail {
88 self.data_region_size - data_head + data_tail
89 } else {
90 data_tail - data_head
91 };
92 data_size <= capacity
93 }
94
write_event(&mut self, data: &[u8]) -> Result<()>95 pub fn write_event(&mut self, data: &[u8]) -> Result<()> {
96 let data_tail = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_tail };
97 let data_head = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_head };
98 // data_tail..data_head is the region that can be written
99 // check if there is enough space to write the event
100 let sample_size = PerfSample::calculate_size(data.len());
101
102 let can_write_sample =
103 self.can_write(sample_size, *data_tail as usize, *data_head as usize);
104 // log::error!(
105 // "can_write_sample: {}, data_tail: {}, data_head: {}, data.len(): {}, region_size: {}",
106 // can_write_sample,
107 // *data_tail,
108 // *data_head,
109 // data.len(),
110 // self.data_region_size
111 // );
112 if !can_write_sample {
113 //we need record it to the lost record
114 self.lost += 1;
115 // log::error!(
116 // "Lost record: {}, data_tail: {}, data_head: {}",
117 // self.lost,
118 // *data_tail,
119 // *data_head
120 // );
121 Ok(())
122 } else {
123 // we can write the sample to the page
124 // If the lost record is not zero, we need to write the lost record first.
125 let can_write_lost_record = self.can_write(
126 size_of::<LostSamples>(),
127 *data_tail as usize,
128 *data_head as usize,
129 );
130 if self.lost > 0 && can_write_lost_record {
131 let new_data_head = self.write_lost(*data_head as usize)?;
132 *data_head = new_data_head as u64;
133 // log::info!(
134 // "Write lost record: {}, data_tail: {}, new_data_head: {}",
135 // self.lost,
136 // *data_tail,
137 // *data_head
138 // );
139 self.lost = 0;
140 self.write_event(data)
141 } else {
142 let new_data_head = self.write_sample(data, *data_head as usize)?;
143 *data_head = new_data_head as u64;
144 // log::info!(
145 // "Write sample record, data_tail: {}, new_data_head: {}",
146 // *data_tail,
147 // *data_head
148 // );
149 Ok(())
150 }
151 }
152 }
153
154 /// Write any data to the page.
155 ///
156 /// Return the new data_head
write_any(&mut self, data: &[u8], data_head: usize) -> Result<usize>157 fn write_any(&mut self, data: &[u8], data_head: usize) -> Result<usize> {
158 let data_region_len = self.data_region_size;
159 let data_region = self.as_mut_slice()[PAGE_SIZE..].as_mut();
160 let data_len = data.len();
161 let end = (data_head + data_len) % data_region_len;
162 let start = data_head;
163 if start < end {
164 data_region[start..end].copy_from_slice(data);
165 } else {
166 let first_len = data_region_len - start;
167 data_region[start..start + first_len].copy_from_slice(&data[..first_len]);
168 data_region[0..end].copy_from_slice(&data[first_len..]);
169 }
170 Ok(end)
171 }
172
173 /// Write a sample to the page.
write_sample(&mut self, data: &[u8], data_head: usize) -> Result<usize>174 fn write_sample(&mut self, data: &[u8], data_head: usize) -> Result<usize> {
175 let perf_sample = PerfSample {
176 s_hdr: SampleHeader {
177 header: perf_event_header {
178 type_: perf_event_type::PERF_RECORD_SAMPLE as u32,
179 misc: 0,
180 size: size_of::<SampleHeader>() as u16 + data.len() as u16,
181 },
182 size: data.len() as u32,
183 },
184 value: data,
185 };
186 let new_head = self.write_any(perf_sample.s_hdr.as_bytes(), data_head)?;
187 self.write_any(perf_sample.value, new_head)
188 }
189
190 /// Write a lost record to the page.
191 ///
192 /// Return the new data_head
write_lost(&mut self, data_head: usize) -> Result<usize>193 fn write_lost(&mut self, data_head: usize) -> Result<usize> {
194 let lost = LostSamples {
195 header: perf_event_header {
196 type_: perf_event_type::PERF_RECORD_LOST as u32,
197 misc: 0,
198 size: size_of::<LostSamples>() as u16,
199 },
200 id: 0,
201 count: self.lost as u64,
202 };
203 self.write_any(lost.as_bytes(), data_head)
204 }
205
readable(&self) -> bool206 pub fn readable(&self) -> bool {
207 let data_tail = unsafe { &(*(self.ptr as *mut perf_event_mmap_page)).data_tail };
208 let data_head = unsafe { &(*(self.ptr as *mut perf_event_mmap_page)).data_head };
209 data_tail != data_head
210 }
as_slice(&self) -> &[u8]211 pub fn as_slice(&self) -> &[u8] {
212 unsafe { core::slice::from_raw_parts(self.ptr as *const u8, self.size) }
213 }
as_mut_slice(&mut self) -> &mut [u8]214 pub fn as_mut_slice(&mut self) -> &mut [u8] {
215 unsafe { core::slice::from_raw_parts_mut(self.ptr as *mut u8, self.size) }
216 }
217 }
218
219 impl BpfPerfEvent {
new(args: PerfProbeArgs) -> Self220 pub fn new(args: PerfProbeArgs) -> Self {
221 BpfPerfEvent {
222 _args: args,
223 data: SpinLock::new(BpfPerfEventData {
224 enabled: false,
225 mmap_page: RingPage::empty(),
226 page_cache: PageCache::new(None),
227 offset: 0,
228 }),
229 }
230 }
do_mmap(&self, _start: usize, len: usize, offset: usize) -> Result<()>231 pub fn do_mmap(&self, _start: usize, len: usize, offset: usize) -> Result<()> {
232 let mut data = self.data.lock();
233 // alloc page frame
234 let (phy_addr, page_count) =
235 unsafe { LockedFrameAllocator.allocate(PageFrameCount::new(len / PAGE_SIZE)) }
236 .ok_or(SystemError::ENOSPC)?;
237 let mut page_manager_guard = page_manager_lock_irqsave();
238 let mut cur_phys = PhysPageFrame::new(phy_addr);
239 for i in 0..page_count.data() {
240 let page = Arc::new(Page::new(true, cur_phys.phys_address()));
241 let paddr = cur_phys.phys_address();
242 page_manager_guard.insert(paddr, &page);
243 data.page_cache.add_page(i, &page);
244 cur_phys = cur_phys.next();
245 }
246 let virt_addr = unsafe { MMArch::phys_2_virt(phy_addr) }.ok_or(SystemError::EFAULT)?;
247 // create mmap page
248 let mmap_page = RingPage::new_init(virt_addr.data(), len, phy_addr);
249 data.mmap_page = mmap_page;
250 data.offset = offset;
251 Ok(())
252 }
253
write_event(&self, data: &[u8]) -> Result<()>254 pub fn write_event(&self, data: &[u8]) -> Result<()> {
255 let mut inner_data = self.data.lock();
256 inner_data.mmap_page.write_event(data)?;
257 Ok(())
258 }
259 }
260
261 impl Drop for BpfPerfEvent {
drop(&mut self)262 fn drop(&mut self) {
263 let mut page_manager_guard = page_manager_lock_irqsave();
264 let data = self.data.lock();
265 let phy_addr = data.mmap_page.phys_addr;
266 let len = data.mmap_page.size;
267 let page_count = PageFrameCount::new(len / PAGE_SIZE);
268 let mut cur_phys = PhysPageFrame::new(phy_addr);
269 for _ in 0..page_count.data() {
270 page_manager_guard.remove_page(&cur_phys.phys_address());
271 cur_phys = cur_phys.next();
272 }
273 }
274 }
275
276 impl IndexNode for BpfPerfEvent {
mmap(&self, start: usize, len: usize, offset: usize) -> Result<()>277 fn mmap(&self, start: usize, len: usize, offset: usize) -> Result<()> {
278 self.do_mmap(start, len, offset)
279 }
280
read_at( &self, _offset: usize, _len: usize, _buf: &mut [u8], _data: SpinLockGuard<FilePrivateData>, ) -> Result<usize>281 fn read_at(
282 &self,
283 _offset: usize,
284 _len: usize,
285 _buf: &mut [u8],
286 _data: SpinLockGuard<FilePrivateData>,
287 ) -> Result<usize> {
288 panic!("PerfEventInode does not support read")
289 }
290
write_at( &self, _offset: usize, _len: usize, _buf: &[u8], _data: SpinLockGuard<FilePrivateData>, ) -> Result<usize>291 fn write_at(
292 &self,
293 _offset: usize,
294 _len: usize,
295 _buf: &[u8],
296 _data: SpinLockGuard<FilePrivateData>,
297 ) -> Result<usize> {
298 panic!("PerfEventInode does not support write")
299 }
300
fs(&self) -> Arc<dyn FileSystem>301 fn fs(&self) -> Arc<dyn FileSystem> {
302 panic!("PerfEventInode does not have a filesystem")
303 }
304
as_any_ref(&self) -> &dyn Any305 fn as_any_ref(&self) -> &dyn Any {
306 self
307 }
list(&self) -> Result<Vec<String>>308 fn list(&self) -> Result<Vec<String>> {
309 Err(SystemError::ENOSYS)
310 }
311
page_cache(&self) -> Option<Arc<PageCache>>312 fn page_cache(&self) -> Option<Arc<PageCache>> {
313 Some(self.data.lock().page_cache.clone())
314 }
315 }
316
317 impl PerfEventOps for BpfPerfEvent {
enable(&self) -> Result<()>318 fn enable(&self) -> Result<()> {
319 self.data.lock().enabled = true;
320 Ok(())
321 }
disable(&self) -> Result<()>322 fn disable(&self) -> Result<()> {
323 self.data.lock().enabled = false;
324 Ok(())
325 }
readable(&self) -> bool326 fn readable(&self) -> bool {
327 self.data.lock().mmap_page.readable()
328 }
329 }
330
perf_event_open_bpf(args: PerfProbeArgs) -> BpfPerfEvent331 pub fn perf_event_open_bpf(args: PerfProbeArgs) -> BpfPerfEvent {
332 BpfPerfEvent::new(args)
333 }
334