1 // SPDX-License-Identifier: (Apache-2.0 OR MIT)
2 // Copyright 2017 Rich Lane <lanerl@gmail.com>
3
4 // Rust-doc comments were left in the module, but it is no longer publicly exposed from the root
5 // file of the crate. Do not expect to find those comments in the documentation of the crate.
6
7 //! This module parses eBPF assembly language source code.
8
9 use alloc::{
10 string::{String, ToString},
11 vec::Vec,
12 };
13
14 #[cfg(feature = "std")]
15 use combine::EasyParser;
16 use combine::{
17 attempt, between, eof, many, many1, one_of, optional,
18 parser::char::{alpha_num, char, digit, hex_digit, spaces, string},
19 sep_by,
20 stream::position::{self},
21 ParseError, Parser, Stream,
22 };
23
24 /// Operand of an instruction.
25 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
26 pub enum Operand {
27 /// Register number.
28 Register(i64),
29 /// Jump offset or immediate.
30 Integer(i64),
31 /// Register number and offset.
32 Memory(i64, i64),
33 /// Used for pattern matching.
34 Nil,
35 }
36
37 /// Parsed instruction.
38 #[derive(Debug, PartialEq, Eq)]
39 pub struct Instruction {
40 /// Instruction name.
41 pub name: String,
42 /// Operands.
43 pub operands: Vec<Operand>,
44 }
45
ident<I>() -> impl Parser<I, Output = String> where I: Stream<Token = char>, I::Error: ParseError<I::Token, I::Range, I::Position>,46 fn ident<I>() -> impl Parser<I, Output = String>
47 where
48 I: Stream<Token = char>,
49 I::Error: ParseError<I::Token, I::Range, I::Position>,
50 {
51 many1(alpha_num())
52 }
53
integer<I>() -> impl Parser<I, Output = i64> where I: Stream<Token = char>, I::Error: ParseError<I::Token, I::Range, I::Position>,54 fn integer<I>() -> impl Parser<I, Output = i64>
55 where
56 I: Stream<Token = char>,
57 I::Error: ParseError<I::Token, I::Range, I::Position>,
58 {
59 let sign = optional(one_of("-+".chars())).map(|x| match x {
60 Some('-') => -1,
61 _ => 1,
62 });
63 let hex = string("0x")
64 .with(many1(hex_digit()))
65 .map(|x: String| u64::from_str_radix(&x, 16).unwrap() as i64);
66 let dec = many1(digit()).map(|x: String| x.parse::<i64>().unwrap());
67 (sign, attempt(hex).or(dec)).map(|(s, x)| s * x)
68 }
69
register<I>() -> impl Parser<I, Output = i64> where I: Stream<Token = char>, I::Error: ParseError<I::Token, I::Range, I::Position>,70 fn register<I>() -> impl Parser<I, Output = i64>
71 where
72 I: Stream<Token = char>,
73 I::Error: ParseError<I::Token, I::Range, I::Position>,
74 {
75 char('r')
76 .with(many1(digit()))
77 .map(|x: String| x.parse::<i64>().unwrap())
78 }
79
operand<I>() -> impl Parser<I, Output = Operand> where I: Stream<Token = char>, I::Error: ParseError<I::Token, I::Range, I::Position>,80 fn operand<I>() -> impl Parser<I, Output = Operand>
81 where
82 I: Stream<Token = char>,
83 I::Error: ParseError<I::Token, I::Range, I::Position>,
84 {
85 let register_operand = register().map(Operand::Register);
86 let immediate = integer().map(Operand::Integer);
87 let memory = between(char('['), char(']'), (register(), optional(integer())))
88 .map(|t| Operand::Memory(t.0, t.1.unwrap_or(0)));
89 register_operand.or(immediate).or(memory)
90 }
91
instruction<I>() -> impl Parser<I, Output = Instruction> where I: Stream<Token = char>, I::Error: ParseError<I::Token, I::Range, I::Position>,92 fn instruction<I>() -> impl Parser<I, Output = Instruction>
93 where
94 I: Stream<Token = char>,
95 I::Error: ParseError<I::Token, I::Range, I::Position>,
96 {
97 let operands = sep_by(operand(), char(',').skip(spaces()));
98 (ident().skip(spaces()), operands, spaces()).map(|t| Instruction {
99 name: t.0,
100 operands: t.1,
101 })
102 }
103
104 /// Parse a string into a list of instructions.
105 ///
106 /// The instructions are not validated and may have invalid names and operand types.
parse(input: &str) -> Result<Vec<Instruction>, String>107 pub fn parse(input: &str) -> Result<Vec<Instruction>, String> {
108 let mut with = spaces().with(many(instruction()).skip(eof()));
109
110 #[cfg(feature = "std")]
111 {
112 match with.easy_parse(position::Stream::new(input)) {
113 Ok((insts, _)) => Ok(insts),
114 Err(err) => Err(err.to_string()),
115 }
116 }
117 #[cfg(not(feature = "std"))]
118 {
119 match with.parse(position::Stream::new(input)) {
120 Ok((insts, _)) => Ok(insts),
121 Err(err) => Err(err.to_string()),
122 }
123 }
124 }
125
126 #[cfg(test)]
127 mod tests {
128 use alloc::{string::ToString, vec};
129
130 use combine::Parser;
131
132 use super::{ident, instruction, integer, operand, parse, register, Instruction, Operand};
133
134 // Unit tests for the different kinds of parsers.
135
136 #[test]
test_ident()137 fn test_ident() {
138 assert_eq!(ident().parse("nop"), Ok(("nop".to_string(), "")));
139 assert_eq!(ident().parse("add32"), Ok(("add32".to_string(), "")));
140 assert_eq!(ident().parse("add32*"), Ok(("add32".to_string(), "*")));
141 }
142
143 #[test]
test_integer()144 fn test_integer() {
145 assert_eq!(integer().parse("0"), Ok((0, "")));
146 assert_eq!(integer().parse("42"), Ok((42, "")));
147 assert_eq!(integer().parse("+42"), Ok((42, "")));
148 assert_eq!(integer().parse("-42"), Ok((-42, "")));
149 assert_eq!(integer().parse("0x0"), Ok((0, "")));
150 assert_eq!(
151 integer().parse("0x123456789abcdef0"),
152 Ok((0x123456789abcdef0, ""))
153 );
154 assert_eq!(integer().parse("-0x1f"), Ok((-31, "")));
155 }
156
157 #[test]
test_register()158 fn test_register() {
159 assert_eq!(register().parse("r0"), Ok((0, "")));
160 assert_eq!(register().parse("r15"), Ok((15, "")));
161 }
162
163 #[test]
test_operand()164 fn test_operand() {
165 assert_eq!(operand().parse("r0"), Ok((Operand::Register(0), "")));
166 assert_eq!(operand().parse("r15"), Ok((Operand::Register(15), "")));
167 assert_eq!(operand().parse("0"), Ok((Operand::Integer(0), "")));
168 assert_eq!(operand().parse("42"), Ok((Operand::Integer(42), "")));
169 assert_eq!(operand().parse("[r1]"), Ok((Operand::Memory(1, 0), "")));
170 assert_eq!(operand().parse("[r3+5]"), Ok((Operand::Memory(3, 5), "")));
171 assert_eq!(
172 operand().parse("[r3+0x1f]"),
173 Ok((Operand::Memory(3, 31), ""))
174 );
175 assert_eq!(
176 operand().parse("[r3-0x1f]"),
177 Ok((Operand::Memory(3, -31), ""))
178 );
179 }
180
181 #[test]
test_instruction()182 fn test_instruction() {
183 assert_eq!(
184 instruction().parse("exit"),
185 Ok((
186 Instruction {
187 name: "exit".to_string(),
188 operands: vec![],
189 },
190 ""
191 ))
192 );
193
194 assert_eq!(
195 instruction().parse("call 2"),
196 Ok((
197 Instruction {
198 name: "call".to_string(),
199 operands: vec![Operand::Integer(2)],
200 },
201 ""
202 ))
203 );
204
205 assert_eq!(
206 instruction().parse("addi r1, 2"),
207 Ok((
208 Instruction {
209 name: "addi".to_string(),
210 operands: vec![Operand::Register(1), Operand::Integer(2)],
211 },
212 ""
213 ))
214 );
215
216 assert_eq!(
217 instruction().parse("ldxb r2, [r1+12]"),
218 Ok((
219 Instruction {
220 name: "ldxb".to_string(),
221 operands: vec![Operand::Register(2), Operand::Memory(1, 12)],
222 },
223 ""
224 ))
225 );
226
227 assert_eq!(
228 instruction().parse("lsh r3, 0x8"),
229 Ok((
230 Instruction {
231 name: "lsh".to_string(),
232 operands: vec![Operand::Register(3), Operand::Integer(8)],
233 },
234 ""
235 ))
236 );
237
238 assert_eq!(
239 instruction().parse("jne r3, 0x8, +37"),
240 Ok((
241 Instruction {
242 name: "jne".to_string(),
243 operands: vec![
244 Operand::Register(3),
245 Operand::Integer(8),
246 Operand::Integer(37)
247 ],
248 },
249 ""
250 ))
251 );
252
253 // Whitespace between operands is optional.
254 assert_eq!(
255 instruction().parse("jne r3,0x8,+37"),
256 Ok((
257 Instruction {
258 name: "jne".to_string(),
259 operands: vec![
260 Operand::Register(3),
261 Operand::Integer(8),
262 Operand::Integer(37)
263 ],
264 },
265 ""
266 ))
267 );
268 }
269
270 // Other unit tests: try to parse various set of instructions.
271
272 #[test]
test_empty()273 fn test_empty() {
274 assert_eq!(parse(""), Ok(vec![]));
275 }
276
277 #[test]
test_exit()278 fn test_exit() {
279 // No operands.
280 assert_eq!(
281 parse("exit"),
282 Ok(vec![Instruction {
283 name: "exit".to_string(),
284 operands: vec![],
285 }])
286 );
287 }
288
289 #[test]
test_lsh()290 fn test_lsh() {
291 // Register and immediate operands.
292 assert_eq!(
293 parse("lsh r3, 0x20"),
294 Ok(vec![Instruction {
295 name: "lsh".to_string(),
296 operands: vec![Operand::Register(3), Operand::Integer(0x20)],
297 }])
298 );
299 }
300
301 #[test]
test_ja()302 fn test_ja() {
303 // Jump offset operand.
304 assert_eq!(
305 parse("ja +1"),
306 Ok(vec![Instruction {
307 name: "ja".to_string(),
308 operands: vec![Operand::Integer(1)],
309 }])
310 );
311 }
312
313 #[test]
test_ldxh()314 fn test_ldxh() {
315 // Register and memory operands.
316 assert_eq!(
317 parse("ldxh r4, [r1+12]"),
318 Ok(vec![Instruction {
319 name: "ldxh".to_string(),
320 operands: vec![Operand::Register(4), Operand::Memory(1, 12)],
321 }])
322 );
323 }
324
325 #[test]
test_tcp_sack()326 fn test_tcp_sack() {
327 // Sample program from ubpf.
328 // We could technically indent the instructions since the parser support white spaces at
329 // the beginning, but there is another test for that.
330 let src = "\
331 ldxb r2, [r1+12]
332 ldxb r3, [r1+13]
333 lsh r3, 0x8
334 or r3, r2
335 mov r0, 0x0
336 jne r3, 0x8, +37
337 ldxb r2, [r1+23]
338 jne r2, 0x6, +35
339 ldxb r2, [r1+14]
340 add r1, 0xe
341 and r2, 0xf
342 lsh r2, 0x2
343 add r1, r2
344 mov r0, 0x0
345 ldxh r4, [r1+12]
346 add r1, 0x14
347 rsh r4, 0x2
348 and r4, 0x3c
349 mov r2, r4
350 add r2, 0xffffffec
351 mov r5, 0x15
352 mov r3, 0x0
353 jgt r5, r4, +20
354 mov r5, r3
355 lsh r5, 0x20
356 arsh r5, 0x20
357 mov r4, r1
358 add r4, r5
359 ldxb r5, [r4]
360 jeq r5, 0x1, +4
361 jeq r5, 0x0, +12
362 mov r6, r3
363 jeq r5, 0x5, +9
364 ja +2
365 add r3, 0x1
366 mov r6, r3
367 ldxb r3, [r4+1]
368 add r3, r6
369 lsh r3, 0x20
370 arsh r3, 0x20
371 jsgt r2, r3, -18
372 ja +1
373 mov r0, 0x1
374 exit
375 ";
376
377 assert_eq!(
378 parse(src),
379 Ok(vec![
380 Instruction {
381 name: "ldxb".to_string(),
382 operands: vec![Operand::Register(2), Operand::Memory(1, 12)],
383 },
384 Instruction {
385 name: "ldxb".to_string(),
386 operands: vec![Operand::Register(3), Operand::Memory(1, 13)],
387 },
388 Instruction {
389 name: "lsh".to_string(),
390 operands: vec![Operand::Register(3), Operand::Integer(8)],
391 },
392 Instruction {
393 name: "or".to_string(),
394 operands: vec![Operand::Register(3), Operand::Register(2)],
395 },
396 Instruction {
397 name: "mov".to_string(),
398 operands: vec![Operand::Register(0), Operand::Integer(0)],
399 },
400 Instruction {
401 name: "jne".to_string(),
402 operands: vec![
403 Operand::Register(3),
404 Operand::Integer(8),
405 Operand::Integer(37)
406 ],
407 },
408 Instruction {
409 name: "ldxb".to_string(),
410 operands: vec![Operand::Register(2), Operand::Memory(1, 23)],
411 },
412 Instruction {
413 name: "jne".to_string(),
414 operands: vec![
415 Operand::Register(2),
416 Operand::Integer(6),
417 Operand::Integer(35)
418 ],
419 },
420 Instruction {
421 name: "ldxb".to_string(),
422 operands: vec![Operand::Register(2), Operand::Memory(1, 14)],
423 },
424 Instruction {
425 name: "add".to_string(),
426 operands: vec![Operand::Register(1), Operand::Integer(14)],
427 },
428 Instruction {
429 name: "and".to_string(),
430 operands: vec![Operand::Register(2), Operand::Integer(15)],
431 },
432 Instruction {
433 name: "lsh".to_string(),
434 operands: vec![Operand::Register(2), Operand::Integer(2)],
435 },
436 Instruction {
437 name: "add".to_string(),
438 operands: vec![Operand::Register(1), Operand::Register(2)],
439 },
440 Instruction {
441 name: "mov".to_string(),
442 operands: vec![Operand::Register(0), Operand::Integer(0)],
443 },
444 Instruction {
445 name: "ldxh".to_string(),
446 operands: vec![Operand::Register(4), Operand::Memory(1, 12)],
447 },
448 Instruction {
449 name: "add".to_string(),
450 operands: vec![Operand::Register(1), Operand::Integer(20)],
451 },
452 Instruction {
453 name: "rsh".to_string(),
454 operands: vec![Operand::Register(4), Operand::Integer(2)],
455 },
456 Instruction {
457 name: "and".to_string(),
458 operands: vec![Operand::Register(4), Operand::Integer(60)],
459 },
460 Instruction {
461 name: "mov".to_string(),
462 operands: vec![Operand::Register(2), Operand::Register(4)],
463 },
464 Instruction {
465 name: "add".to_string(),
466 operands: vec![Operand::Register(2), Operand::Integer(4294967276)],
467 },
468 Instruction {
469 name: "mov".to_string(),
470 operands: vec![Operand::Register(5), Operand::Integer(21)],
471 },
472 Instruction {
473 name: "mov".to_string(),
474 operands: vec![Operand::Register(3), Operand::Integer(0)],
475 },
476 Instruction {
477 name: "jgt".to_string(),
478 operands: vec![
479 Operand::Register(5),
480 Operand::Register(4),
481 Operand::Integer(20)
482 ],
483 },
484 Instruction {
485 name: "mov".to_string(),
486 operands: vec![Operand::Register(5), Operand::Register(3)],
487 },
488 Instruction {
489 name: "lsh".to_string(),
490 operands: vec![Operand::Register(5), Operand::Integer(32)],
491 },
492 Instruction {
493 name: "arsh".to_string(),
494 operands: vec![Operand::Register(5), Operand::Integer(32)],
495 },
496 Instruction {
497 name: "mov".to_string(),
498 operands: vec![Operand::Register(4), Operand::Register(1)],
499 },
500 Instruction {
501 name: "add".to_string(),
502 operands: vec![Operand::Register(4), Operand::Register(5)],
503 },
504 Instruction {
505 name: "ldxb".to_string(),
506 operands: vec![Operand::Register(5), Operand::Memory(4, 0)],
507 },
508 Instruction {
509 name: "jeq".to_string(),
510 operands: vec![
511 Operand::Register(5),
512 Operand::Integer(1),
513 Operand::Integer(4)
514 ],
515 },
516 Instruction {
517 name: "jeq".to_string(),
518 operands: vec![
519 Operand::Register(5),
520 Operand::Integer(0),
521 Operand::Integer(12)
522 ],
523 },
524 Instruction {
525 name: "mov".to_string(),
526 operands: vec![Operand::Register(6), Operand::Register(3)],
527 },
528 Instruction {
529 name: "jeq".to_string(),
530 operands: vec![
531 Operand::Register(5),
532 Operand::Integer(5),
533 Operand::Integer(9)
534 ],
535 },
536 Instruction {
537 name: "ja".to_string(),
538 operands: vec![Operand::Integer(2)],
539 },
540 Instruction {
541 name: "add".to_string(),
542 operands: vec![Operand::Register(3), Operand::Integer(1)],
543 },
544 Instruction {
545 name: "mov".to_string(),
546 operands: vec![Operand::Register(6), Operand::Register(3)],
547 },
548 Instruction {
549 name: "ldxb".to_string(),
550 operands: vec![Operand::Register(3), Operand::Memory(4, 1)],
551 },
552 Instruction {
553 name: "add".to_string(),
554 operands: vec![Operand::Register(3), Operand::Register(6)],
555 },
556 Instruction {
557 name: "lsh".to_string(),
558 operands: vec![Operand::Register(3), Operand::Integer(32)],
559 },
560 Instruction {
561 name: "arsh".to_string(),
562 operands: vec![Operand::Register(3), Operand::Integer(32)],
563 },
564 Instruction {
565 name: "jsgt".to_string(),
566 operands: vec![
567 Operand::Register(2),
568 Operand::Register(3),
569 Operand::Integer(-18)
570 ],
571 },
572 Instruction {
573 name: "ja".to_string(),
574 operands: vec![Operand::Integer(1)],
575 },
576 Instruction {
577 name: "mov".to_string(),
578 operands: vec![Operand::Register(0), Operand::Integer(1)],
579 },
580 Instruction {
581 name: "exit".to_string(),
582 operands: vec![],
583 }
584 ])
585 );
586 }
587
588 /// When running without `std` the `EasyParser` provided by `combine`
589 /// cannot be used. Because of this we need to use the `Parser` and the
590 /// error messages are different.
591 #[test]
test_error_eof()592 fn test_error_eof() {
593 let expected_error;
594 #[cfg(feature = "std")]
595 {
596 expected_error = Err(
597 "Parse error at line: 1, column: 6\nUnexpected end of input\nExpected digit\n"
598 .to_string(),
599 );
600 }
601 #[cfg(not(feature = "std"))]
602 {
603 expected_error = Err("unexpected parse".to_string());
604 }
605 // Unexpected end of input in a register name.
606 assert_eq!(parse("lsh r"), expected_error);
607 }
608
609 /// When running without `std` the `EasyParser` provided by `combine`
610 /// cannot be used. Because of this we need to use the `Parser` and the
611 /// error messages are different.
612 #[test]
test_error_unexpected_character()613 fn test_error_unexpected_character() {
614 let expected_error;
615 #[cfg(feature = "std")]
616 {
617 expected_error = Err(
618 "Parse error at line: 2, column: 1\nUnexpected `^`\nExpected letter or digit, whitespaces, `r`, `-`, `+`, `[` or end of input\n".to_string()
619 );
620 }
621 #[cfg(not(feature = "std"))]
622 {
623 expected_error = Err("unexpected parse".to_string());
624 }
625 // Unexpected character at end of input.
626 assert_eq!(parse("exit\n^"), expected_error);
627 }
628
629 #[test]
test_initial_whitespace()630 fn test_initial_whitespace() {
631 assert_eq!(
632 parse(
633 "
634 exit"
635 ),
636 Ok(vec![Instruction {
637 name: "exit".to_string(),
638 operands: vec![],
639 }])
640 );
641 }
642 }
643