1 use super::{PerfEventOps, Result}; 2 use crate::arch::mm::LockedFrameAllocator; 3 use crate::arch::MMArch; 4 use crate::filesystem::vfs::file::PageCache; 5 use crate::filesystem::vfs::{FilePrivateData, FileSystem, IndexNode}; 6 use crate::include::bindings::linux_bpf::{ 7 perf_event_header, perf_event_mmap_page, perf_event_type, 8 }; 9 use crate::libs::spinlock::{SpinLock, SpinLockGuard}; 10 use crate::mm::allocator::page_frame::{FrameAllocator, PageFrameCount, PhysPageFrame}; 11 use crate::mm::page::{page_manager_lock_irqsave, Page}; 12 use crate::mm::{MemoryManagementArch, PhysAddr}; 13 use crate::perf::util::{LostSamples, PerfProbeArgs, PerfSample, SampleHeader}; 14 use alloc::string::String; 15 use alloc::sync::Arc; 16 use alloc::vec::Vec; 17 use core::any::Any; 18 use core::fmt::Debug; 19 use system_error::SystemError; 20 const PAGE_SIZE: usize = MMArch::PAGE_SIZE; 21 #[derive(Debug)] 22 pub struct BpfPerfEvent { 23 _args: PerfProbeArgs, 24 data: SpinLock<BpfPerfEventData>, 25 } 26 27 #[derive(Debug)] 28 pub struct BpfPerfEventData { 29 enabled: bool, 30 mmap_page: RingPage, 31 page_cache: Arc<PageCache>, 32 offset: usize, 33 } 34 35 #[derive(Debug)] 36 pub struct RingPage { 37 size: usize, 38 ptr: usize, 39 data_region_size: usize, 40 lost: usize, 41 phys_addr: PhysAddr, 42 } 43 44 impl RingPage { 45 pub fn empty() -> Self { 46 RingPage { 47 ptr: 0, 48 size: 0, 49 data_region_size: 0, 50 lost: 0, 51 phys_addr: PhysAddr::new(0), 52 } 53 } 54 55 pub fn new_init(start: usize, len: usize, phys_addr: PhysAddr) -> Self { 56 Self::init(start as _, len, phys_addr) 57 } 58 59 fn init(ptr: *mut u8, size: usize, phys_addr: PhysAddr) -> Self { 60 assert_eq!(size % PAGE_SIZE, 0); 61 assert!(size / PAGE_SIZE >= 2); 62 // The first page will be filled with perf_event_mmap_page 63 unsafe { 64 let perf_event_mmap_page = &mut *(ptr as *mut perf_event_mmap_page); 65 perf_event_mmap_page.data_offset = PAGE_SIZE as u64; 66 perf_event_mmap_page.data_size = (size - PAGE_SIZE) as u64; 67 // user will read sample or lost record from data_tail 68 perf_event_mmap_page.data_tail = 0; 69 // kernel will write sample or lost record from data_head 70 perf_event_mmap_page.data_head = 0; 71 // It is a ring buffer. 72 } 73 RingPage { 74 ptr: ptr as usize, 75 size, 76 data_region_size: size - PAGE_SIZE, 77 lost: 0, 78 phys_addr, 79 } 80 } 81 82 fn can_write(&self, data_size: usize, data_tail: usize, data_head: usize) -> bool { 83 if (data_head + 1) % self.data_region_size == data_tail { 84 // The buffer is full 85 return false; 86 } 87 let capacity = if data_head >= data_tail { 88 self.data_region_size - data_head + data_tail 89 } else { 90 data_tail - data_head 91 }; 92 data_size <= capacity 93 } 94 95 pub fn write_event(&mut self, data: &[u8]) -> Result<()> { 96 let data_tail = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_tail }; 97 let data_head = unsafe { &mut (*(self.ptr as *mut perf_event_mmap_page)).data_head }; 98 // data_tail..data_head is the region that can be written 99 // check if there is enough space to write the event 100 let sample_size = PerfSample::calculate_size(data.len()); 101 102 let can_write_sample = 103 self.can_write(sample_size, *data_tail as usize, *data_head as usize); 104 // log::error!( 105 // "can_write_sample: {}, data_tail: {}, data_head: {}, data.len(): {}, region_size: {}", 106 // can_write_sample, 107 // *data_tail, 108 // *data_head, 109 // data.len(), 110 // self.data_region_size 111 // ); 112 if !can_write_sample { 113 //we need record it to the lost record 114 self.lost += 1; 115 // log::error!( 116 // "Lost record: {}, data_tail: {}, data_head: {}", 117 // self.lost, 118 // *data_tail, 119 // *data_head 120 // ); 121 Ok(()) 122 } else { 123 // we can write the sample to the page 124 // If the lost record is not zero, we need to write the lost record first. 125 let can_write_lost_record = self.can_write( 126 size_of::<LostSamples>(), 127 *data_tail as usize, 128 *data_head as usize, 129 ); 130 if self.lost > 0 && can_write_lost_record { 131 let new_data_head = self.write_lost(*data_head as usize)?; 132 *data_head = new_data_head as u64; 133 // log::info!( 134 // "Write lost record: {}, data_tail: {}, new_data_head: {}", 135 // self.lost, 136 // *data_tail, 137 // *data_head 138 // ); 139 self.lost = 0; 140 self.write_event(data) 141 } else { 142 let new_data_head = self.write_sample(data, *data_head as usize)?; 143 *data_head = new_data_head as u64; 144 // log::info!( 145 // "Write sample record, data_tail: {}, new_data_head: {}", 146 // *data_tail, 147 // *data_head 148 // ); 149 Ok(()) 150 } 151 } 152 } 153 154 /// Write any data to the page. 155 /// 156 /// Return the new data_head 157 fn write_any(&mut self, data: &[u8], data_head: usize) -> Result<usize> { 158 let data_region_len = self.data_region_size; 159 let data_region = self.as_mut_slice()[PAGE_SIZE..].as_mut(); 160 let data_len = data.len(); 161 let end = (data_head + data_len) % data_region_len; 162 let start = data_head; 163 if start < end { 164 data_region[start..end].copy_from_slice(data); 165 } else { 166 let first_len = data_region_len - start; 167 data_region[start..start + first_len].copy_from_slice(&data[..first_len]); 168 data_region[0..end].copy_from_slice(&data[first_len..]); 169 } 170 Ok(end) 171 } 172 173 /// Write a sample to the page. 174 fn write_sample(&mut self, data: &[u8], data_head: usize) -> Result<usize> { 175 let perf_sample = PerfSample { 176 s_hdr: SampleHeader { 177 header: perf_event_header { 178 type_: perf_event_type::PERF_RECORD_SAMPLE as u32, 179 misc: 0, 180 size: size_of::<SampleHeader>() as u16 + data.len() as u16, 181 }, 182 size: data.len() as u32, 183 }, 184 value: data, 185 }; 186 let new_head = self.write_any(perf_sample.s_hdr.as_bytes(), data_head)?; 187 self.write_any(perf_sample.value, new_head) 188 } 189 190 /// Write a lost record to the page. 191 /// 192 /// Return the new data_head 193 fn write_lost(&mut self, data_head: usize) -> Result<usize> { 194 let lost = LostSamples { 195 header: perf_event_header { 196 type_: perf_event_type::PERF_RECORD_LOST as u32, 197 misc: 0, 198 size: size_of::<LostSamples>() as u16, 199 }, 200 id: 0, 201 count: self.lost as u64, 202 }; 203 self.write_any(lost.as_bytes(), data_head) 204 } 205 206 pub fn readable(&self) -> bool { 207 let data_tail = unsafe { &(*(self.ptr as *mut perf_event_mmap_page)).data_tail }; 208 let data_head = unsafe { &(*(self.ptr as *mut perf_event_mmap_page)).data_head }; 209 data_tail != data_head 210 } 211 pub fn as_slice(&self) -> &[u8] { 212 unsafe { core::slice::from_raw_parts(self.ptr as *const u8, self.size) } 213 } 214 pub fn as_mut_slice(&mut self) -> &mut [u8] { 215 unsafe { core::slice::from_raw_parts_mut(self.ptr as *mut u8, self.size) } 216 } 217 } 218 219 impl BpfPerfEvent { 220 pub fn new(args: PerfProbeArgs) -> Self { 221 BpfPerfEvent { 222 _args: args, 223 data: SpinLock::new(BpfPerfEventData { 224 enabled: false, 225 mmap_page: RingPage::empty(), 226 page_cache: PageCache::new(None), 227 offset: 0, 228 }), 229 } 230 } 231 pub fn do_mmap(&self, _start: usize, len: usize, offset: usize) -> Result<()> { 232 let mut data = self.data.lock(); 233 // alloc page frame 234 let (phy_addr, page_count) = 235 unsafe { LockedFrameAllocator.allocate(PageFrameCount::new(len / PAGE_SIZE)) } 236 .ok_or(SystemError::ENOSPC)?; 237 let mut page_manager_guard = page_manager_lock_irqsave(); 238 let mut cur_phys = PhysPageFrame::new(phy_addr); 239 for i in 0..page_count.data() { 240 let page = Arc::new(Page::new(true, cur_phys.phys_address())); 241 let paddr = cur_phys.phys_address(); 242 page_manager_guard.insert(paddr, &page); 243 data.page_cache.add_page(i, &page); 244 cur_phys = cur_phys.next(); 245 } 246 let virt_addr = unsafe { MMArch::phys_2_virt(phy_addr) }.ok_or(SystemError::EFAULT)?; 247 // create mmap page 248 let mmap_page = RingPage::new_init(virt_addr.data(), len, phy_addr); 249 data.mmap_page = mmap_page; 250 data.offset = offset; 251 Ok(()) 252 } 253 254 pub fn write_event(&self, data: &[u8]) -> Result<()> { 255 let mut inner_data = self.data.lock(); 256 inner_data.mmap_page.write_event(data)?; 257 Ok(()) 258 } 259 } 260 261 impl Drop for BpfPerfEvent { 262 fn drop(&mut self) { 263 let mut page_manager_guard = page_manager_lock_irqsave(); 264 let data = self.data.lock(); 265 let phy_addr = data.mmap_page.phys_addr; 266 let len = data.mmap_page.size; 267 let page_count = PageFrameCount::new(len / PAGE_SIZE); 268 let mut cur_phys = PhysPageFrame::new(phy_addr); 269 for _ in 0..page_count.data() { 270 page_manager_guard.remove_page(&cur_phys.phys_address()); 271 cur_phys = cur_phys.next(); 272 } 273 } 274 } 275 276 impl IndexNode for BpfPerfEvent { 277 fn mmap(&self, start: usize, len: usize, offset: usize) -> Result<()> { 278 self.do_mmap(start, len, offset) 279 } 280 281 fn read_at( 282 &self, 283 _offset: usize, 284 _len: usize, 285 _buf: &mut [u8], 286 _data: SpinLockGuard<FilePrivateData>, 287 ) -> Result<usize> { 288 panic!("PerfEventInode does not support read") 289 } 290 291 fn write_at( 292 &self, 293 _offset: usize, 294 _len: usize, 295 _buf: &[u8], 296 _data: SpinLockGuard<FilePrivateData>, 297 ) -> Result<usize> { 298 panic!("PerfEventInode does not support write") 299 } 300 301 fn fs(&self) -> Arc<dyn FileSystem> { 302 panic!("PerfEventInode does not have a filesystem") 303 } 304 305 fn as_any_ref(&self) -> &dyn Any { 306 self 307 } 308 fn list(&self) -> Result<Vec<String>> { 309 Err(SystemError::ENOSYS) 310 } 311 312 fn page_cache(&self) -> Option<Arc<PageCache>> { 313 Some(self.data.lock().page_cache.clone()) 314 } 315 } 316 317 impl PerfEventOps for BpfPerfEvent { 318 fn enable(&self) -> Result<()> { 319 self.data.lock().enabled = true; 320 Ok(()) 321 } 322 fn disable(&self) -> Result<()> { 323 self.data.lock().enabled = false; 324 Ok(()) 325 } 326 fn readable(&self) -> bool { 327 self.data.lock().mmap_page.readable() 328 } 329 } 330 331 pub fn perf_event_open_bpf(args: PerfProbeArgs) -> BpfPerfEvent { 332 BpfPerfEvent::new(args) 333 } 334