xref: /DragonOS/kernel/src/bpf/map/mod.rs (revision fae6e9ade46a52976ad5d099643d51cc20876448)
1 mod array_map;
2 mod hash_map;
3 mod lru;
4 mod queue;
5 mod util;
6 
7 use super::Result;
8 use crate::bpf::map::array_map::{ArrayMap, PerCpuArrayMap, PerfEventArrayMap};
9 use crate::bpf::map::hash_map::PerCpuHashMap;
10 use crate::bpf::map::util::{BpfMapGetNextKeyArg, BpfMapMeta, BpfMapUpdateArg};
11 use crate::filesystem::vfs::file::{File, FileMode};
12 use crate::filesystem::vfs::syscall::ModeType;
13 use crate::filesystem::vfs::{FilePrivateData, FileSystem, FileType, IndexNode, Metadata};
14 use crate::include::bindings::linux_bpf::{bpf_attr, bpf_map_type};
15 use crate::libs::casting::DowncastArc;
16 use crate::libs::spinlock::{SpinLock, SpinLockGuard};
17 use crate::process::ProcessManager;
18 use crate::syscall::user_access::{UserBufferReader, UserBufferWriter};
19 use alloc::boxed::Box;
20 use alloc::string::String;
21 use alloc::sync::Arc;
22 use alloc::vec::Vec;
23 use core::any::Any;
24 use core::fmt::Debug;
25 use intertrait::CastFromSync;
26 use log::{error, info};
27 use system_error::SystemError;
28 
29 #[derive(Debug)]
30 pub struct BpfMap {
31     inner_map: SpinLock<Box<dyn BpfMapCommonOps>>,
32     meta: BpfMapMeta,
33 }
34 
35 pub type BpfCallBackFn = fn(key: &[u8], value: &[u8], ctx: *const u8) -> i32;
36 
37 pub trait BpfMapCommonOps: Send + Sync + Debug + CastFromSync {
38     /// Lookup an element in the map.
39     ///
40     /// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_map_lookup_elem/
lookup_elem(&mut self, _key: &[u8]) -> Result<Option<&[u8]>>41     fn lookup_elem(&mut self, _key: &[u8]) -> Result<Option<&[u8]>> {
42         Err(SystemError::ENOSYS)
43     }
44     /// Update an element in the map.
45     ///
46     /// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_map_update_elem/
update_elem(&mut self, _key: &[u8], _value: &[u8], _flags: u64) -> Result<()>47     fn update_elem(&mut self, _key: &[u8], _value: &[u8], _flags: u64) -> Result<()> {
48         Err(SystemError::ENOSYS)
49     }
50     /// Delete an element from the map.
51     ///
52     /// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_map_delete_elem/
delete_elem(&mut self, _key: &[u8]) -> Result<()>53     fn delete_elem(&mut self, _key: &[u8]) -> Result<()> {
54         Err(SystemError::ENOSYS)
55     }
56     /// For each element in map, call callback_fn function with map,
57     /// callback_ctx and other map-specific parameters.
58     ///
59     /// See https://ebpf-docs.dylanreimerink.nl/linux/helper-function/bpf_for_each_map_elem/
for_each_elem(&mut self, _cb: BpfCallBackFn, _ctx: *const u8, _flags: u64) -> Result<u32>60     fn for_each_elem(&mut self, _cb: BpfCallBackFn, _ctx: *const u8, _flags: u64) -> Result<u32> {
61         Err(SystemError::ENOSYS)
62     }
63     /// Look up an element with the given key in the map referred to by the file descriptor fd,
64     /// and if found, delete the element.
lookup_and_delete_elem(&mut self, _key: &[u8], _value: &mut [u8]) -> Result<()>65     fn lookup_and_delete_elem(&mut self, _key: &[u8], _value: &mut [u8]) -> Result<()> {
66         Err(SystemError::ENOSYS)
67     }
68 
69     /// erform a lookup in percpu map for an entry associated to key on cpu.
lookup_percpu_elem(&mut self, _key: &[u8], _cpu: u32) -> Result<Option<&[u8]>>70     fn lookup_percpu_elem(&mut self, _key: &[u8], _cpu: u32) -> Result<Option<&[u8]>> {
71         Err(SystemError::ENOSYS)
72     }
73     /// Get the next key in the map. If key is None, get the first key.
74     ///
75     /// Called from syscall
get_next_key(&self, _key: Option<&[u8]>, _next_key: &mut [u8]) -> Result<()>76     fn get_next_key(&self, _key: Option<&[u8]>, _next_key: &mut [u8]) -> Result<()> {
77         Err(SystemError::ENOSYS)
78     }
79 
80     /// Push an element value in map.
push_elem(&mut self, _value: &[u8], _flags: u64) -> Result<()>81     fn push_elem(&mut self, _value: &[u8], _flags: u64) -> Result<()> {
82         Err(SystemError::ENOSYS)
83     }
84 
85     /// Pop an element value from map.
pop_elem(&mut self, _value: &mut [u8]) -> Result<()>86     fn pop_elem(&mut self, _value: &mut [u8]) -> Result<()> {
87         Err(SystemError::ENOSYS)
88     }
89 
90     /// Peek an element value from map.
peek_elem(&self, _value: &mut [u8]) -> Result<()>91     fn peek_elem(&self, _value: &mut [u8]) -> Result<()> {
92         Err(SystemError::ENOSYS)
93     }
94 
95     /// Freeze the map.
96     ///
97     /// It's useful for .rodata maps.
freeze(&self) -> Result<()>98     fn freeze(&self) -> Result<()> {
99         Err(SystemError::ENOSYS)
100     }
101 
102     /// Get the first value pointer.
first_value_ptr(&self) -> Result<*const u8>103     fn first_value_ptr(&self) -> Result<*const u8> {
104         Err(SystemError::ENOSYS)
105     }
106 }
107 impl DowncastArc for dyn BpfMapCommonOps {
as_any_arc(self: Arc<Self>) -> Arc<dyn Any>108     fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any> {
109         self
110     }
111 }
112 impl BpfMap {
new(map: Box<dyn BpfMapCommonOps>, meta: BpfMapMeta) -> Self113     pub fn new(map: Box<dyn BpfMapCommonOps>, meta: BpfMapMeta) -> Self {
114         assert_ne!(meta.key_size, 0);
115         BpfMap {
116             inner_map: SpinLock::new(map),
117             meta,
118         }
119     }
120 
inner_map(&self) -> &SpinLock<Box<dyn BpfMapCommonOps>>121     pub fn inner_map(&self) -> &SpinLock<Box<dyn BpfMapCommonOps>> {
122         &self.inner_map
123     }
124 
key_size(&self) -> usize125     pub fn key_size(&self) -> usize {
126         self.meta.key_size as usize
127     }
128 
value_size(&self) -> usize129     pub fn value_size(&self) -> usize {
130         self.meta.value_size as usize
131     }
132 }
133 
134 impl IndexNode for BpfMap {
open(&self, _data: SpinLockGuard<FilePrivateData>, _mode: &FileMode) -> Result<()>135     fn open(&self, _data: SpinLockGuard<FilePrivateData>, _mode: &FileMode) -> Result<()> {
136         Ok(())
137     }
close(&self, _data: SpinLockGuard<FilePrivateData>) -> Result<()>138     fn close(&self, _data: SpinLockGuard<FilePrivateData>) -> Result<()> {
139         Ok(())
140     }
read_at( &self, _offset: usize, _len: usize, _buf: &mut [u8], _data: SpinLockGuard<FilePrivateData>, ) -> Result<usize>141     fn read_at(
142         &self,
143         _offset: usize,
144         _len: usize,
145         _buf: &mut [u8],
146         _data: SpinLockGuard<FilePrivateData>,
147     ) -> Result<usize> {
148         Err(SystemError::ENOSYS)
149     }
150 
write_at( &self, _offset: usize, _len: usize, _buf: &[u8], _data: SpinLockGuard<FilePrivateData>, ) -> Result<usize>151     fn write_at(
152         &self,
153         _offset: usize,
154         _len: usize,
155         _buf: &[u8],
156         _data: SpinLockGuard<FilePrivateData>,
157     ) -> Result<usize> {
158         Err(SystemError::ENOSYS)
159     }
160 
metadata(&self) -> Result<Metadata>161     fn metadata(&self) -> Result<Metadata> {
162         let meta = Metadata {
163             mode: ModeType::from_bits_truncate(0o755),
164             file_type: FileType::File,
165             ..Default::default()
166         };
167         Ok(meta)
168     }
169 
resize(&self, _len: usize) -> Result<()>170     fn resize(&self, _len: usize) -> Result<()> {
171         Ok(())
172     }
173 
fs(&self) -> Arc<dyn FileSystem>174     fn fs(&self) -> Arc<dyn FileSystem> {
175         todo!("BpfMap does not have a filesystem")
176     }
177 
as_any_ref(&self) -> &dyn Any178     fn as_any_ref(&self) -> &dyn Any {
179         self
180     }
181 
list(&self) -> Result<Vec<String>>182     fn list(&self) -> Result<Vec<String>> {
183         Err(SystemError::ENOSYS)
184     }
185 }
186 
187 /// Create a map and return a file descriptor that refers to
188 /// the map.  The close-on-exec file descriptor flag
189 /// is automatically enabled for the new file descriptor.
190 ///
191 /// See https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_CREATE/
bpf_map_create(attr: &bpf_attr) -> Result<usize>192 pub fn bpf_map_create(attr: &bpf_attr) -> Result<usize> {
193     let map_meta = BpfMapMeta::try_from(attr)?;
194     info!("The map attr is {:#?}", map_meta);
195     let map: Box<dyn BpfMapCommonOps> = match map_meta.map_type {
196         bpf_map_type::BPF_MAP_TYPE_ARRAY => {
197             let array_map = ArrayMap::new(&map_meta)?;
198             Box::new(array_map)
199         }
200         bpf_map_type::BPF_MAP_TYPE_PERCPU_ARRAY => {
201             let per_cpu_array_map = PerCpuArrayMap::new(&map_meta)?;
202             Box::new(per_cpu_array_map)
203         }
204         bpf_map_type::BPF_MAP_TYPE_PERF_EVENT_ARRAY => {
205             let perf_event_array_map = PerfEventArrayMap::new(&map_meta)?;
206             Box::new(perf_event_array_map)
207         }
208 
209         bpf_map_type::BPF_MAP_TYPE_CPUMAP
210         | bpf_map_type::BPF_MAP_TYPE_DEVMAP
211         | bpf_map_type::BPF_MAP_TYPE_DEVMAP_HASH => {
212             error!("bpf map type {:?} not implemented", map_meta.map_type);
213             Err(SystemError::EINVAL)?
214         }
215         bpf_map_type::BPF_MAP_TYPE_HASH => {
216             let hash_map = hash_map::BpfHashMap::new(&map_meta)?;
217             Box::new(hash_map)
218         }
219         bpf_map_type::BPF_MAP_TYPE_PERCPU_HASH => {
220             let per_cpu_hash_map = PerCpuHashMap::new(&map_meta)?;
221             Box::new(per_cpu_hash_map)
222         }
223         bpf_map_type::BPF_MAP_TYPE_QUEUE => {
224             let queue_map = queue::QueueMap::new(&map_meta)?;
225             Box::new(queue_map)
226         }
227         bpf_map_type::BPF_MAP_TYPE_STACK => {
228             let stack_map = queue::StackMap::new(&map_meta)?;
229             Box::new(stack_map)
230         }
231         bpf_map_type::BPF_MAP_TYPE_LRU_HASH => {
232             let lru_hash_map = lru::LruMap::new(&map_meta)?;
233             Box::new(lru_hash_map)
234         }
235         bpf_map_type::BPF_MAP_TYPE_LRU_PERCPU_HASH => {
236             let lru_per_cpu_hash_map = lru::PerCpuLruMap::new(&map_meta)?;
237             Box::new(lru_per_cpu_hash_map)
238         }
239         _ => {
240             unimplemented!("bpf map type {:?} not implemented", map_meta.map_type)
241         }
242     };
243     let bpf_map = BpfMap::new(map, map_meta);
244     let fd_table = ProcessManager::current_pcb().fd_table();
245     let file = File::new(Arc::new(bpf_map), FileMode::O_RDWR | FileMode::O_CLOEXEC)?;
246     let fd = fd_table.write().alloc_fd(file, None).map(|x| x as usize)?;
247     info!("create map with fd: [{}]", fd);
248     Ok(fd)
249 }
250 
251 /// Create or update an element (key/value pair) in a specified map.
252 ///
253 /// See https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_UPDATE_ELEM/
bpf_map_update_elem(attr: &bpf_attr) -> Result<usize>254 pub fn bpf_map_update_elem(attr: &bpf_attr) -> Result<usize> {
255     let arg = BpfMapUpdateArg::from(attr);
256     info!("<bpf_map_update_elem>: {:#x?}", arg);
257     let map = get_map_file(arg.map_fd as i32)?;
258     let meta = &map.meta;
259     let key_size = meta.key_size as usize;
260     let value_size = meta.value_size as usize;
261 
262     let key_buf = UserBufferReader::new(arg.key as *const u8, key_size, true)?;
263     let value_buf = UserBufferReader::new(arg.value as *const u8, value_size, true)?;
264 
265     let key = key_buf.read_from_user(0)?;
266     let value = value_buf.read_from_user(0)?;
267     map.inner_map.lock().update_elem(key, value, arg.flags)?;
268     info!("bpf_map_update_elem ok");
269     Ok(0)
270 }
271 
bpf_map_freeze(attr: &bpf_attr) -> Result<usize>272 pub fn bpf_map_freeze(attr: &bpf_attr) -> Result<usize> {
273     let arg = BpfMapUpdateArg::from(attr);
274     let map_fd = arg.map_fd;
275     info!("<bpf_map_freeze>: map_fd: {:}", map_fd);
276     let map = get_map_file(map_fd as i32)?;
277     map.inner_map.lock().freeze()?;
278     Ok(0)
279 }
280 
281 ///  Look up an element by key in a specified map and return its value.
282 ///
283 /// See https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_LOOKUP_ELEM/
bpf_lookup_elem(attr: &bpf_attr) -> Result<usize>284 pub fn bpf_lookup_elem(attr: &bpf_attr) -> Result<usize> {
285     let arg = BpfMapUpdateArg::from(attr);
286     // info!("<bpf_lookup_elem>: {:#x?}", arg);
287     let map = get_map_file(arg.map_fd as _)?;
288     let meta = &map.meta;
289     let key_size = meta.key_size as usize;
290     let value_size = meta.value_size as usize;
291 
292     let key_buf = UserBufferReader::new(arg.key as *const u8, key_size, true)?;
293     let mut value_buf = UserBufferWriter::new(arg.value as *mut u8, value_size, true)?;
294 
295     let key = key_buf.read_from_user(0)?;
296 
297     let mut inner = map.inner_map.lock();
298     let r_value = inner.lookup_elem(key)?;
299     if let Some(r_value) = r_value {
300         value_buf.copy_to_user(r_value, 0)?;
301         Ok(0)
302     } else {
303         Err(SystemError::ENOENT)
304     }
305 }
306 /// Look up an element by key in a specified map and return the key of the next element.
307 ///
308 /// - If key is `None`, the operation returns zero and sets the next_key pointer to the key of the first element.
309 /// - If key is `Some(T)`, the operation returns zero and sets the next_key pointer to the key of the next element.
310 /// - If key is the last element, returns -1 and errno is set to ENOENT.
311 ///
312 /// See https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_GET_NEXT_KEY/
bpf_map_get_next_key(attr: &bpf_attr) -> Result<usize>313 pub fn bpf_map_get_next_key(attr: &bpf_attr) -> Result<usize> {
314     let arg = BpfMapGetNextKeyArg::from(attr);
315     // info!("<bpf_map_get_next_key>: {:#x?}", arg);
316     let map = get_map_file(arg.map_fd as i32)?;
317     let meta = &map.meta;
318     let key_size = meta.key_size as usize;
319 
320     let key = if let Some(key_ptr) = arg.key {
321         let key_buf = UserBufferReader::new(key_ptr as *const u8, key_size, true)?;
322         let key = key_buf.read_from_user(0)?.to_vec();
323         Some(key)
324     } else {
325         None
326     };
327     let key = key.as_deref();
328     let mut next_key_buf = UserBufferWriter::new(arg.next_key as *mut u8, key_size, true)?;
329     let inner = map.inner_map.lock();
330     let next_key = next_key_buf.buffer(0)?;
331     inner.get_next_key(key, next_key)?;
332     // info!("next_key: {:?}", next_key);
333     Ok(0)
334 }
335 
336 /// Look up and delete an element by key in a specified map.
337 ///
338 /// # WARN
339 ///
340 /// Not all map types (particularly array maps) support this operation,
341 /// instead a zero value can be written to the map value. Check the map types page to check for support.
342 ///
343 /// See https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_DELETE_ELEM/
bpf_map_delete_elem(attr: &bpf_attr) -> Result<usize>344 pub fn bpf_map_delete_elem(attr: &bpf_attr) -> Result<usize> {
345     let arg = BpfMapUpdateArg::from(attr);
346     // info!("<bpf_map_delete_elem>: {:#x?}", arg);
347     let map = get_map_file(arg.map_fd as i32)?;
348     let meta = &map.meta;
349     let key_size = meta.key_size as usize;
350 
351     let key_buf = UserBufferReader::new(arg.key as *const u8, key_size, true)?;
352     let key = key_buf.read_from_user(0)?;
353     map.inner_map.lock().delete_elem(key)?;
354     Ok(0)
355 }
356 
357 /// Iterate and fetch multiple elements in a map.
358 ///
359 /// See https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_LOOKUP_BATCH/
bpf_map_lookup_batch(_attr: &bpf_attr) -> Result<usize>360 pub fn bpf_map_lookup_batch(_attr: &bpf_attr) -> Result<usize> {
361     todo!()
362 }
363 
364 /// Look up an element with the given key in the map referred to by the file descriptor fd,
365 /// and if found, delete the element.
366 ///
367 /// For BPF_MAP_TYPE_QUEUE and BPF_MAP_TYPE_STACK map types, the flags argument needs to be set to 0,
368 /// but for other map types, it may be specified as:
369 /// - BPF_F_LOCK : If this flag is set, the command will acquire the spin-lock of the map value we are looking up.
370 ///
371 /// If the map contains no spin-lock in its value, -EINVAL will be returned by the command.
372 ///
373 /// The BPF_MAP_TYPE_QUEUE and BPF_MAP_TYPE_STACK map types implement this command as a “pop” operation,
374 /// deleting the top element rather than one corresponding to key.
375 /// The key and key_len parameters should be zeroed when issuing this operation for these map types.
376 ///
377 /// This command is only valid for the following map types:
378 /// - BPF_MAP_TYPE_QUEUE
379 /// - BPF_MAP_TYPE_STACK
380 /// - BPF_MAP_TYPE_HASH
381 /// - BPF_MAP_TYPE_PERCPU_HASH
382 /// - BPF_MAP_TYPE_LRU_HASH
383 /// - BPF_MAP_TYPE_LRU_PERCPU_HASH
384 ///
385 ///
386 /// See https://ebpf-docs.dylanreimerink.nl/linux/syscall/BPF_MAP_LOOKUP_AND_DELETE_ELEM/
bpf_map_lookup_and_delete_elem(attr: &bpf_attr) -> Result<usize>387 pub fn bpf_map_lookup_and_delete_elem(attr: &bpf_attr) -> Result<usize> {
388     let arg = BpfMapUpdateArg::from(attr);
389     // info!("<bpf_map_lookup_and_delete_elem>: {:#x?}", arg);
390     let map = get_map_file(arg.map_fd as i32)?;
391     let meta = &map.meta;
392     let key_size = meta.key_size as usize;
393     let value_size = meta.value_size as usize;
394 
395     let key_buf = UserBufferReader::new(arg.key as *const u8, key_size, true)?;
396     let mut value_buf = UserBufferWriter::new(arg.value as *mut u8, value_size, true)?;
397 
398     let value = value_buf.buffer(0)?;
399     let key = key_buf.read_from_user(0)?;
400     let mut inner = map.inner_map.lock();
401     inner.lookup_and_delete_elem(key, value)?;
402     Ok(0)
403 }
404 
get_map_file(fd: i32) -> Result<Arc<BpfMap>>405 fn get_map_file(fd: i32) -> Result<Arc<BpfMap>> {
406     let fd_table = ProcessManager::current_pcb().fd_table();
407     let map = fd_table
408         .read()
409         .get_file_by_fd(fd)
410         .ok_or(SystemError::EBADF)?;
411     let map = map
412         .inode()
413         .downcast_arc::<BpfMap>()
414         .ok_or(SystemError::EINVAL)?;
415     Ok(map)
416 }
417