xref: /DragonOS/kernel/crates/rbpf/src/asm_parser.rs (revision fae6e9ade46a52976ad5d099643d51cc20876448)
1 // SPDX-License-Identifier: (Apache-2.0 OR MIT)
2 // Copyright 2017 Rich Lane <lanerl@gmail.com>
3 
4 // Rust-doc comments were left in the module, but it is no longer publicly exposed from the root
5 // file of the crate. Do not expect to find those comments in the documentation of the crate.
6 
7 //! This module parses eBPF assembly language source code.
8 
9 use alloc::{
10     string::{String, ToString},
11     vec::Vec,
12 };
13 
14 #[cfg(feature = "std")]
15 use combine::EasyParser;
16 use combine::{
17     attempt, between, eof, many, many1, one_of, optional,
18     parser::char::{alpha_num, char, digit, hex_digit, spaces, string},
19     sep_by,
20     stream::position::{self},
21     ParseError, Parser, Stream,
22 };
23 
24 /// Operand of an instruction.
25 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
26 pub enum Operand {
27     /// Register number.
28     Register(i64),
29     /// Jump offset or immediate.
30     Integer(i64),
31     /// Register number and offset.
32     Memory(i64, i64),
33     /// Used for pattern matching.
34     Nil,
35 }
36 
37 /// Parsed instruction.
38 #[derive(Debug, PartialEq, Eq)]
39 pub struct Instruction {
40     /// Instruction name.
41     pub name: String,
42     /// Operands.
43     pub operands: Vec<Operand>,
44 }
45 
ident<I>() -> impl Parser<I, Output = String> where I: Stream<Token = char>, I::Error: ParseError<I::Token, I::Range, I::Position>,46 fn ident<I>() -> impl Parser<I, Output = String>
47 where
48     I: Stream<Token = char>,
49     I::Error: ParseError<I::Token, I::Range, I::Position>,
50 {
51     many1(alpha_num())
52 }
53 
integer<I>() -> impl Parser<I, Output = i64> where I: Stream<Token = char>, I::Error: ParseError<I::Token, I::Range, I::Position>,54 fn integer<I>() -> impl Parser<I, Output = i64>
55 where
56     I: Stream<Token = char>,
57     I::Error: ParseError<I::Token, I::Range, I::Position>,
58 {
59     let sign = optional(one_of("-+".chars())).map(|x| match x {
60         Some('-') => -1,
61         _ => 1,
62     });
63     let hex = string("0x")
64         .with(many1(hex_digit()))
65         .map(|x: String| u64::from_str_radix(&x, 16).unwrap() as i64);
66     let dec = many1(digit()).map(|x: String| x.parse::<i64>().unwrap());
67     (sign, attempt(hex).or(dec)).map(|(s, x)| s * x)
68 }
69 
register<I>() -> impl Parser<I, Output = i64> where I: Stream<Token = char>, I::Error: ParseError<I::Token, I::Range, I::Position>,70 fn register<I>() -> impl Parser<I, Output = i64>
71 where
72     I: Stream<Token = char>,
73     I::Error: ParseError<I::Token, I::Range, I::Position>,
74 {
75     char('r')
76         .with(many1(digit()))
77         .map(|x: String| x.parse::<i64>().unwrap())
78 }
79 
operand<I>() -> impl Parser<I, Output = Operand> where I: Stream<Token = char>, I::Error: ParseError<I::Token, I::Range, I::Position>,80 fn operand<I>() -> impl Parser<I, Output = Operand>
81 where
82     I: Stream<Token = char>,
83     I::Error: ParseError<I::Token, I::Range, I::Position>,
84 {
85     let register_operand = register().map(Operand::Register);
86     let immediate = integer().map(Operand::Integer);
87     let memory = between(char('['), char(']'), (register(), optional(integer())))
88         .map(|t| Operand::Memory(t.0, t.1.unwrap_or(0)));
89     register_operand.or(immediate).or(memory)
90 }
91 
instruction<I>() -> impl Parser<I, Output = Instruction> where I: Stream<Token = char>, I::Error: ParseError<I::Token, I::Range, I::Position>,92 fn instruction<I>() -> impl Parser<I, Output = Instruction>
93 where
94     I: Stream<Token = char>,
95     I::Error: ParseError<I::Token, I::Range, I::Position>,
96 {
97     let operands = sep_by(operand(), char(',').skip(spaces()));
98     (ident().skip(spaces()), operands, spaces()).map(|t| Instruction {
99         name: t.0,
100         operands: t.1,
101     })
102 }
103 
104 /// Parse a string into a list of instructions.
105 ///
106 /// The instructions are not validated and may have invalid names and operand types.
parse(input: &str) -> Result<Vec<Instruction>, String>107 pub fn parse(input: &str) -> Result<Vec<Instruction>, String> {
108     let mut with = spaces().with(many(instruction()).skip(eof()));
109 
110     #[cfg(feature = "std")]
111     {
112         match with.easy_parse(position::Stream::new(input)) {
113             Ok((insts, _)) => Ok(insts),
114             Err(err) => Err(err.to_string()),
115         }
116     }
117     #[cfg(not(feature = "std"))]
118     {
119         match with.parse(position::Stream::new(input)) {
120             Ok((insts, _)) => Ok(insts),
121             Err(err) => Err(err.to_string()),
122         }
123     }
124 }
125 
126 #[cfg(test)]
127 mod tests {
128     use alloc::{string::ToString, vec};
129 
130     use combine::Parser;
131 
132     use super::{ident, instruction, integer, operand, parse, register, Instruction, Operand};
133 
134     // Unit tests for the different kinds of parsers.
135 
136     #[test]
test_ident()137     fn test_ident() {
138         assert_eq!(ident().parse("nop"), Ok(("nop".to_string(), "")));
139         assert_eq!(ident().parse("add32"), Ok(("add32".to_string(), "")));
140         assert_eq!(ident().parse("add32*"), Ok(("add32".to_string(), "*")));
141     }
142 
143     #[test]
test_integer()144     fn test_integer() {
145         assert_eq!(integer().parse("0"), Ok((0, "")));
146         assert_eq!(integer().parse("42"), Ok((42, "")));
147         assert_eq!(integer().parse("+42"), Ok((42, "")));
148         assert_eq!(integer().parse("-42"), Ok((-42, "")));
149         assert_eq!(integer().parse("0x0"), Ok((0, "")));
150         assert_eq!(
151             integer().parse("0x123456789abcdef0"),
152             Ok((0x123456789abcdef0, ""))
153         );
154         assert_eq!(integer().parse("-0x1f"), Ok((-31, "")));
155     }
156 
157     #[test]
test_register()158     fn test_register() {
159         assert_eq!(register().parse("r0"), Ok((0, "")));
160         assert_eq!(register().parse("r15"), Ok((15, "")));
161     }
162 
163     #[test]
test_operand()164     fn test_operand() {
165         assert_eq!(operand().parse("r0"), Ok((Operand::Register(0), "")));
166         assert_eq!(operand().parse("r15"), Ok((Operand::Register(15), "")));
167         assert_eq!(operand().parse("0"), Ok((Operand::Integer(0), "")));
168         assert_eq!(operand().parse("42"), Ok((Operand::Integer(42), "")));
169         assert_eq!(operand().parse("[r1]"), Ok((Operand::Memory(1, 0), "")));
170         assert_eq!(operand().parse("[r3+5]"), Ok((Operand::Memory(3, 5), "")));
171         assert_eq!(
172             operand().parse("[r3+0x1f]"),
173             Ok((Operand::Memory(3, 31), ""))
174         );
175         assert_eq!(
176             operand().parse("[r3-0x1f]"),
177             Ok((Operand::Memory(3, -31), ""))
178         );
179     }
180 
181     #[test]
test_instruction()182     fn test_instruction() {
183         assert_eq!(
184             instruction().parse("exit"),
185             Ok((
186                 Instruction {
187                     name: "exit".to_string(),
188                     operands: vec![],
189                 },
190                 ""
191             ))
192         );
193 
194         assert_eq!(
195             instruction().parse("call 2"),
196             Ok((
197                 Instruction {
198                     name: "call".to_string(),
199                     operands: vec![Operand::Integer(2)],
200                 },
201                 ""
202             ))
203         );
204 
205         assert_eq!(
206             instruction().parse("addi r1, 2"),
207             Ok((
208                 Instruction {
209                     name: "addi".to_string(),
210                     operands: vec![Operand::Register(1), Operand::Integer(2)],
211                 },
212                 ""
213             ))
214         );
215 
216         assert_eq!(
217             instruction().parse("ldxb r2, [r1+12]"),
218             Ok((
219                 Instruction {
220                     name: "ldxb".to_string(),
221                     operands: vec![Operand::Register(2), Operand::Memory(1, 12)],
222                 },
223                 ""
224             ))
225         );
226 
227         assert_eq!(
228             instruction().parse("lsh r3, 0x8"),
229             Ok((
230                 Instruction {
231                     name: "lsh".to_string(),
232                     operands: vec![Operand::Register(3), Operand::Integer(8)],
233                 },
234                 ""
235             ))
236         );
237 
238         assert_eq!(
239             instruction().parse("jne r3, 0x8, +37"),
240             Ok((
241                 Instruction {
242                     name: "jne".to_string(),
243                     operands: vec![
244                         Operand::Register(3),
245                         Operand::Integer(8),
246                         Operand::Integer(37)
247                     ],
248                 },
249                 ""
250             ))
251         );
252 
253         // Whitespace between operands is optional.
254         assert_eq!(
255             instruction().parse("jne r3,0x8,+37"),
256             Ok((
257                 Instruction {
258                     name: "jne".to_string(),
259                     operands: vec![
260                         Operand::Register(3),
261                         Operand::Integer(8),
262                         Operand::Integer(37)
263                     ],
264                 },
265                 ""
266             ))
267         );
268     }
269 
270     // Other unit tests: try to parse various set of instructions.
271 
272     #[test]
test_empty()273     fn test_empty() {
274         assert_eq!(parse(""), Ok(vec![]));
275     }
276 
277     #[test]
test_exit()278     fn test_exit() {
279         // No operands.
280         assert_eq!(
281             parse("exit"),
282             Ok(vec![Instruction {
283                 name: "exit".to_string(),
284                 operands: vec![],
285             }])
286         );
287     }
288 
289     #[test]
test_lsh()290     fn test_lsh() {
291         // Register and immediate operands.
292         assert_eq!(
293             parse("lsh r3, 0x20"),
294             Ok(vec![Instruction {
295                 name: "lsh".to_string(),
296                 operands: vec![Operand::Register(3), Operand::Integer(0x20)],
297             }])
298         );
299     }
300 
301     #[test]
test_ja()302     fn test_ja() {
303         // Jump offset operand.
304         assert_eq!(
305             parse("ja +1"),
306             Ok(vec![Instruction {
307                 name: "ja".to_string(),
308                 operands: vec![Operand::Integer(1)],
309             }])
310         );
311     }
312 
313     #[test]
test_ldxh()314     fn test_ldxh() {
315         // Register and memory operands.
316         assert_eq!(
317             parse("ldxh r4, [r1+12]"),
318             Ok(vec![Instruction {
319                 name: "ldxh".to_string(),
320                 operands: vec![Operand::Register(4), Operand::Memory(1, 12)],
321             }])
322         );
323     }
324 
325     #[test]
test_tcp_sack()326     fn test_tcp_sack() {
327         // Sample program from ubpf.
328         // We could technically indent the instructions since the parser support white spaces at
329         // the beginning, but there is another test for that.
330         let src = "\
331 ldxb r2, [r1+12]
332 ldxb r3, [r1+13]
333 lsh r3, 0x8
334 or r3, r2
335 mov r0, 0x0
336 jne r3, 0x8, +37
337 ldxb r2, [r1+23]
338 jne r2, 0x6, +35
339 ldxb r2, [r1+14]
340 add r1, 0xe
341 and r2, 0xf
342 lsh r2, 0x2
343 add r1, r2
344 mov r0, 0x0
345 ldxh r4, [r1+12]
346 add r1, 0x14
347 rsh r4, 0x2
348 and r4, 0x3c
349 mov r2, r4
350 add r2, 0xffffffec
351 mov r5, 0x15
352 mov r3, 0x0
353 jgt r5, r4, +20
354 mov r5, r3
355 lsh r5, 0x20
356 arsh r5, 0x20
357 mov r4, r1
358 add r4, r5
359 ldxb r5, [r4]
360 jeq r5, 0x1, +4
361 jeq r5, 0x0, +12
362 mov r6, r3
363 jeq r5, 0x5, +9
364 ja +2
365 add r3, 0x1
366 mov r6, r3
367 ldxb r3, [r4+1]
368 add r3, r6
369 lsh r3, 0x20
370 arsh r3, 0x20
371 jsgt r2, r3, -18
372 ja +1
373 mov r0, 0x1
374 exit
375 ";
376 
377         assert_eq!(
378             parse(src),
379             Ok(vec![
380                 Instruction {
381                     name: "ldxb".to_string(),
382                     operands: vec![Operand::Register(2), Operand::Memory(1, 12)],
383                 },
384                 Instruction {
385                     name: "ldxb".to_string(),
386                     operands: vec![Operand::Register(3), Operand::Memory(1, 13)],
387                 },
388                 Instruction {
389                     name: "lsh".to_string(),
390                     operands: vec![Operand::Register(3), Operand::Integer(8)],
391                 },
392                 Instruction {
393                     name: "or".to_string(),
394                     operands: vec![Operand::Register(3), Operand::Register(2)],
395                 },
396                 Instruction {
397                     name: "mov".to_string(),
398                     operands: vec![Operand::Register(0), Operand::Integer(0)],
399                 },
400                 Instruction {
401                     name: "jne".to_string(),
402                     operands: vec![
403                         Operand::Register(3),
404                         Operand::Integer(8),
405                         Operand::Integer(37)
406                     ],
407                 },
408                 Instruction {
409                     name: "ldxb".to_string(),
410                     operands: vec![Operand::Register(2), Operand::Memory(1, 23)],
411                 },
412                 Instruction {
413                     name: "jne".to_string(),
414                     operands: vec![
415                         Operand::Register(2),
416                         Operand::Integer(6),
417                         Operand::Integer(35)
418                     ],
419                 },
420                 Instruction {
421                     name: "ldxb".to_string(),
422                     operands: vec![Operand::Register(2), Operand::Memory(1, 14)],
423                 },
424                 Instruction {
425                     name: "add".to_string(),
426                     operands: vec![Operand::Register(1), Operand::Integer(14)],
427                 },
428                 Instruction {
429                     name: "and".to_string(),
430                     operands: vec![Operand::Register(2), Operand::Integer(15)],
431                 },
432                 Instruction {
433                     name: "lsh".to_string(),
434                     operands: vec![Operand::Register(2), Operand::Integer(2)],
435                 },
436                 Instruction {
437                     name: "add".to_string(),
438                     operands: vec![Operand::Register(1), Operand::Register(2)],
439                 },
440                 Instruction {
441                     name: "mov".to_string(),
442                     operands: vec![Operand::Register(0), Operand::Integer(0)],
443                 },
444                 Instruction {
445                     name: "ldxh".to_string(),
446                     operands: vec![Operand::Register(4), Operand::Memory(1, 12)],
447                 },
448                 Instruction {
449                     name: "add".to_string(),
450                     operands: vec![Operand::Register(1), Operand::Integer(20)],
451                 },
452                 Instruction {
453                     name: "rsh".to_string(),
454                     operands: vec![Operand::Register(4), Operand::Integer(2)],
455                 },
456                 Instruction {
457                     name: "and".to_string(),
458                     operands: vec![Operand::Register(4), Operand::Integer(60)],
459                 },
460                 Instruction {
461                     name: "mov".to_string(),
462                     operands: vec![Operand::Register(2), Operand::Register(4)],
463                 },
464                 Instruction {
465                     name: "add".to_string(),
466                     operands: vec![Operand::Register(2), Operand::Integer(4294967276)],
467                 },
468                 Instruction {
469                     name: "mov".to_string(),
470                     operands: vec![Operand::Register(5), Operand::Integer(21)],
471                 },
472                 Instruction {
473                     name: "mov".to_string(),
474                     operands: vec![Operand::Register(3), Operand::Integer(0)],
475                 },
476                 Instruction {
477                     name: "jgt".to_string(),
478                     operands: vec![
479                         Operand::Register(5),
480                         Operand::Register(4),
481                         Operand::Integer(20)
482                     ],
483                 },
484                 Instruction {
485                     name: "mov".to_string(),
486                     operands: vec![Operand::Register(5), Operand::Register(3)],
487                 },
488                 Instruction {
489                     name: "lsh".to_string(),
490                     operands: vec![Operand::Register(5), Operand::Integer(32)],
491                 },
492                 Instruction {
493                     name: "arsh".to_string(),
494                     operands: vec![Operand::Register(5), Operand::Integer(32)],
495                 },
496                 Instruction {
497                     name: "mov".to_string(),
498                     operands: vec![Operand::Register(4), Operand::Register(1)],
499                 },
500                 Instruction {
501                     name: "add".to_string(),
502                     operands: vec![Operand::Register(4), Operand::Register(5)],
503                 },
504                 Instruction {
505                     name: "ldxb".to_string(),
506                     operands: vec![Operand::Register(5), Operand::Memory(4, 0)],
507                 },
508                 Instruction {
509                     name: "jeq".to_string(),
510                     operands: vec![
511                         Operand::Register(5),
512                         Operand::Integer(1),
513                         Operand::Integer(4)
514                     ],
515                 },
516                 Instruction {
517                     name: "jeq".to_string(),
518                     operands: vec![
519                         Operand::Register(5),
520                         Operand::Integer(0),
521                         Operand::Integer(12)
522                     ],
523                 },
524                 Instruction {
525                     name: "mov".to_string(),
526                     operands: vec![Operand::Register(6), Operand::Register(3)],
527                 },
528                 Instruction {
529                     name: "jeq".to_string(),
530                     operands: vec![
531                         Operand::Register(5),
532                         Operand::Integer(5),
533                         Operand::Integer(9)
534                     ],
535                 },
536                 Instruction {
537                     name: "ja".to_string(),
538                     operands: vec![Operand::Integer(2)],
539                 },
540                 Instruction {
541                     name: "add".to_string(),
542                     operands: vec![Operand::Register(3), Operand::Integer(1)],
543                 },
544                 Instruction {
545                     name: "mov".to_string(),
546                     operands: vec![Operand::Register(6), Operand::Register(3)],
547                 },
548                 Instruction {
549                     name: "ldxb".to_string(),
550                     operands: vec![Operand::Register(3), Operand::Memory(4, 1)],
551                 },
552                 Instruction {
553                     name: "add".to_string(),
554                     operands: vec![Operand::Register(3), Operand::Register(6)],
555                 },
556                 Instruction {
557                     name: "lsh".to_string(),
558                     operands: vec![Operand::Register(3), Operand::Integer(32)],
559                 },
560                 Instruction {
561                     name: "arsh".to_string(),
562                     operands: vec![Operand::Register(3), Operand::Integer(32)],
563                 },
564                 Instruction {
565                     name: "jsgt".to_string(),
566                     operands: vec![
567                         Operand::Register(2),
568                         Operand::Register(3),
569                         Operand::Integer(-18)
570                     ],
571                 },
572                 Instruction {
573                     name: "ja".to_string(),
574                     operands: vec![Operand::Integer(1)],
575                 },
576                 Instruction {
577                     name: "mov".to_string(),
578                     operands: vec![Operand::Register(0), Operand::Integer(1)],
579                 },
580                 Instruction {
581                     name: "exit".to_string(),
582                     operands: vec![],
583                 }
584             ])
585         );
586     }
587 
588     /// When running without `std` the `EasyParser` provided by `combine`
589     /// cannot be used. Because of this we need to use the `Parser` and the
590     /// error messages are different.
591     #[test]
test_error_eof()592     fn test_error_eof() {
593         let expected_error;
594         #[cfg(feature = "std")]
595         {
596             expected_error = Err(
597                 "Parse error at line: 1, column: 6\nUnexpected end of input\nExpected digit\n"
598                     .to_string(),
599             );
600         }
601         #[cfg(not(feature = "std"))]
602         {
603             expected_error = Err("unexpected parse".to_string());
604         }
605         // Unexpected end of input in a register name.
606         assert_eq!(parse("lsh r"), expected_error);
607     }
608 
609     /// When running without `std` the `EasyParser` provided by `combine`
610     /// cannot be used. Because of this we need to use the `Parser` and the
611     /// error messages are different.
612     #[test]
test_error_unexpected_character()613     fn test_error_unexpected_character() {
614         let expected_error;
615         #[cfg(feature = "std")]
616         {
617             expected_error = Err(
618                 "Parse error at line: 2, column: 1\nUnexpected `^`\nExpected letter or digit, whitespaces, `r`, `-`, `+`, `[` or end of input\n".to_string()
619             );
620         }
621         #[cfg(not(feature = "std"))]
622         {
623             expected_error = Err("unexpected parse".to_string());
624         }
625         // Unexpected character at end of input.
626         assert_eq!(parse("exit\n^"), expected_error);
627     }
628 
629     #[test]
test_initial_whitespace()630     fn test_initial_whitespace() {
631         assert_eq!(
632             parse(
633                 "
634                           exit"
635             ),
636             Ok(vec![Instruction {
637                 name: "exit".to_string(),
638                 operands: vec![],
639             }])
640         );
641     }
642 }
643