xref: /DragonOS/kernel/crates/rbpf/src/assembler.rs (revision fae6e9ade46a52976ad5d099643d51cc20876448)
1*fae6e9adSlinfeng // SPDX-License-Identifier: (Apache-2.0 OR MIT)
2*fae6e9adSlinfeng // Copyright 2017 Rich Lane <lanerl@gmail.com>
3*fae6e9adSlinfeng 
4*fae6e9adSlinfeng //! This module translates eBPF assembly language to binary.
5*fae6e9adSlinfeng 
6*fae6e9adSlinfeng use alloc::{
7*fae6e9adSlinfeng     collections::BTreeMap,
8*fae6e9adSlinfeng     format,
9*fae6e9adSlinfeng     string::{String, ToString},
10*fae6e9adSlinfeng     vec,
11*fae6e9adSlinfeng     vec::Vec,
12*fae6e9adSlinfeng };
13*fae6e9adSlinfeng 
14*fae6e9adSlinfeng use self::InstructionType::{
15*fae6e9adSlinfeng     AluBinary, AluUnary, Call, Endian, JumpConditional, JumpUnconditional, LoadAbs, LoadImm,
16*fae6e9adSlinfeng     LoadInd, LoadReg, NoOperand, StoreImm, StoreReg,
17*fae6e9adSlinfeng };
18*fae6e9adSlinfeng use crate::{
19*fae6e9adSlinfeng     asm_parser::{
20*fae6e9adSlinfeng         parse, Instruction, Operand,
21*fae6e9adSlinfeng         Operand::{Integer, Memory, Nil, Register},
22*fae6e9adSlinfeng     },
23*fae6e9adSlinfeng     ebpf::{self, Insn},
24*fae6e9adSlinfeng };
25*fae6e9adSlinfeng 
26*fae6e9adSlinfeng #[derive(Clone, Copy, Debug, PartialEq)]
27*fae6e9adSlinfeng enum InstructionType {
28*fae6e9adSlinfeng     AluBinary,
29*fae6e9adSlinfeng     AluUnary,
30*fae6e9adSlinfeng     LoadImm,
31*fae6e9adSlinfeng     LoadAbs,
32*fae6e9adSlinfeng     LoadInd,
33*fae6e9adSlinfeng     LoadReg,
34*fae6e9adSlinfeng     StoreImm,
35*fae6e9adSlinfeng     StoreReg,
36*fae6e9adSlinfeng     JumpUnconditional,
37*fae6e9adSlinfeng     JumpConditional,
38*fae6e9adSlinfeng     Call,
39*fae6e9adSlinfeng     Endian(i64),
40*fae6e9adSlinfeng     NoOperand,
41*fae6e9adSlinfeng }
42*fae6e9adSlinfeng 
make_instruction_map() -> BTreeMap<String, (InstructionType, u8)>43*fae6e9adSlinfeng fn make_instruction_map() -> BTreeMap<String, (InstructionType, u8)> {
44*fae6e9adSlinfeng     let mut result = BTreeMap::new();
45*fae6e9adSlinfeng 
46*fae6e9adSlinfeng     let alu_binary_ops = [
47*fae6e9adSlinfeng         ("add", ebpf::BPF_ADD),
48*fae6e9adSlinfeng         ("sub", ebpf::BPF_SUB),
49*fae6e9adSlinfeng         ("mul", ebpf::BPF_MUL),
50*fae6e9adSlinfeng         ("div", ebpf::BPF_DIV),
51*fae6e9adSlinfeng         ("or", ebpf::BPF_OR),
52*fae6e9adSlinfeng         ("and", ebpf::BPF_AND),
53*fae6e9adSlinfeng         ("lsh", ebpf::BPF_LSH),
54*fae6e9adSlinfeng         ("rsh", ebpf::BPF_RSH),
55*fae6e9adSlinfeng         ("mod", ebpf::BPF_MOD),
56*fae6e9adSlinfeng         ("xor", ebpf::BPF_XOR),
57*fae6e9adSlinfeng         ("mov", ebpf::BPF_MOV),
58*fae6e9adSlinfeng         ("arsh", ebpf::BPF_ARSH),
59*fae6e9adSlinfeng     ];
60*fae6e9adSlinfeng 
61*fae6e9adSlinfeng     let mem_sizes = [
62*fae6e9adSlinfeng         ("w", ebpf::BPF_W),
63*fae6e9adSlinfeng         ("h", ebpf::BPF_H),
64*fae6e9adSlinfeng         ("b", ebpf::BPF_B),
65*fae6e9adSlinfeng         ("dw", ebpf::BPF_DW),
66*fae6e9adSlinfeng     ];
67*fae6e9adSlinfeng 
68*fae6e9adSlinfeng     let jump_conditions = [
69*fae6e9adSlinfeng         ("jeq", ebpf::BPF_JEQ),
70*fae6e9adSlinfeng         ("jgt", ebpf::BPF_JGT),
71*fae6e9adSlinfeng         ("jge", ebpf::BPF_JGE),
72*fae6e9adSlinfeng         ("jlt", ebpf::BPF_JLT),
73*fae6e9adSlinfeng         ("jle", ebpf::BPF_JLE),
74*fae6e9adSlinfeng         ("jset", ebpf::BPF_JSET),
75*fae6e9adSlinfeng         ("jne", ebpf::BPF_JNE),
76*fae6e9adSlinfeng         ("jsgt", ebpf::BPF_JSGT),
77*fae6e9adSlinfeng         ("jsge", ebpf::BPF_JSGE),
78*fae6e9adSlinfeng         ("jslt", ebpf::BPF_JSLT),
79*fae6e9adSlinfeng         ("jsle", ebpf::BPF_JSLE),
80*fae6e9adSlinfeng     ];
81*fae6e9adSlinfeng 
82*fae6e9adSlinfeng     {
83*fae6e9adSlinfeng         let mut entry = |name: &str, inst_type: InstructionType, opc: u8| {
84*fae6e9adSlinfeng             result.insert(name.to_string(), (inst_type, opc))
85*fae6e9adSlinfeng         };
86*fae6e9adSlinfeng 
87*fae6e9adSlinfeng         // Miscellaneous.
88*fae6e9adSlinfeng         entry("exit", NoOperand, ebpf::EXIT);
89*fae6e9adSlinfeng         entry("ja", JumpUnconditional, ebpf::JA);
90*fae6e9adSlinfeng         entry("call", Call, ebpf::CALL);
91*fae6e9adSlinfeng         entry("lddw", LoadImm, ebpf::LD_DW_IMM);
92*fae6e9adSlinfeng 
93*fae6e9adSlinfeng         // AluUnary.
94*fae6e9adSlinfeng         entry("neg", AluUnary, ebpf::NEG64);
95*fae6e9adSlinfeng         entry("neg32", AluUnary, ebpf::NEG32);
96*fae6e9adSlinfeng         entry("neg64", AluUnary, ebpf::NEG64);
97*fae6e9adSlinfeng 
98*fae6e9adSlinfeng         // AluBinary.
99*fae6e9adSlinfeng         for &(name, opc) in &alu_binary_ops {
100*fae6e9adSlinfeng             entry(name, AluBinary, ebpf::BPF_ALU64 | opc);
101*fae6e9adSlinfeng             entry(&format!("{name}32"), AluBinary, ebpf::BPF_ALU | opc);
102*fae6e9adSlinfeng             entry(&format!("{name}64"), AluBinary, ebpf::BPF_ALU64 | opc);
103*fae6e9adSlinfeng         }
104*fae6e9adSlinfeng 
105*fae6e9adSlinfeng         // LoadAbs, LoadInd, LoadReg, StoreImm, and StoreReg.
106*fae6e9adSlinfeng         for &(suffix, size) in &mem_sizes {
107*fae6e9adSlinfeng             entry(
108*fae6e9adSlinfeng                 &format!("ldabs{suffix}"),
109*fae6e9adSlinfeng                 LoadAbs,
110*fae6e9adSlinfeng                 ebpf::BPF_ABS | ebpf::BPF_LD | size,
111*fae6e9adSlinfeng             );
112*fae6e9adSlinfeng             entry(
113*fae6e9adSlinfeng                 &format!("ldind{suffix}"),
114*fae6e9adSlinfeng                 LoadInd,
115*fae6e9adSlinfeng                 ebpf::BPF_IND | ebpf::BPF_LD | size,
116*fae6e9adSlinfeng             );
117*fae6e9adSlinfeng             entry(
118*fae6e9adSlinfeng                 &format!("ldx{suffix}"),
119*fae6e9adSlinfeng                 LoadReg,
120*fae6e9adSlinfeng                 ebpf::BPF_MEM | ebpf::BPF_LDX | size,
121*fae6e9adSlinfeng             );
122*fae6e9adSlinfeng             entry(
123*fae6e9adSlinfeng                 &format!("st{suffix}"),
124*fae6e9adSlinfeng                 StoreImm,
125*fae6e9adSlinfeng                 ebpf::BPF_MEM | ebpf::BPF_ST | size,
126*fae6e9adSlinfeng             );
127*fae6e9adSlinfeng             entry(
128*fae6e9adSlinfeng                 &format!("stx{suffix}"),
129*fae6e9adSlinfeng                 StoreReg,
130*fae6e9adSlinfeng                 ebpf::BPF_MEM | ebpf::BPF_STX | size,
131*fae6e9adSlinfeng             );
132*fae6e9adSlinfeng         }
133*fae6e9adSlinfeng 
134*fae6e9adSlinfeng         // JumpConditional.
135*fae6e9adSlinfeng         for &(name, condition) in &jump_conditions {
136*fae6e9adSlinfeng             entry(name, JumpConditional, ebpf::BPF_JMP | condition);
137*fae6e9adSlinfeng             entry(
138*fae6e9adSlinfeng                 &format!("{name}32"),
139*fae6e9adSlinfeng                 JumpConditional,
140*fae6e9adSlinfeng                 ebpf::BPF_JMP32 | condition,
141*fae6e9adSlinfeng             );
142*fae6e9adSlinfeng         }
143*fae6e9adSlinfeng 
144*fae6e9adSlinfeng         // Endian.
145*fae6e9adSlinfeng         for &size in &[16, 32, 64] {
146*fae6e9adSlinfeng             entry(&format!("be{size}"), Endian(size), ebpf::BE);
147*fae6e9adSlinfeng             entry(&format!("le{size}"), Endian(size), ebpf::LE);
148*fae6e9adSlinfeng         }
149*fae6e9adSlinfeng     }
150*fae6e9adSlinfeng 
151*fae6e9adSlinfeng     result
152*fae6e9adSlinfeng }
153*fae6e9adSlinfeng 
insn(opc: u8, dst: i64, src: i64, off: i64, imm: i64) -> Result<Insn, String>154*fae6e9adSlinfeng fn insn(opc: u8, dst: i64, src: i64, off: i64, imm: i64) -> Result<Insn, String> {
155*fae6e9adSlinfeng     if !(0..16).contains(&dst) {
156*fae6e9adSlinfeng         return Err(format!("Invalid destination register {dst}"));
157*fae6e9adSlinfeng     }
158*fae6e9adSlinfeng     if dst < 0 || src >= 16 {
159*fae6e9adSlinfeng         return Err(format!("Invalid source register {src}"));
160*fae6e9adSlinfeng     }
161*fae6e9adSlinfeng     if !(-32768..32768).contains(&off) {
162*fae6e9adSlinfeng         return Err(format!("Invalid offset {off}"));
163*fae6e9adSlinfeng     }
164*fae6e9adSlinfeng     if !(-2147483648..2147483648).contains(&imm) {
165*fae6e9adSlinfeng         return Err(format!("Invalid immediate {imm}"));
166*fae6e9adSlinfeng     }
167*fae6e9adSlinfeng     Ok(Insn {
168*fae6e9adSlinfeng         opc,
169*fae6e9adSlinfeng         dst: dst as u8,
170*fae6e9adSlinfeng         src: src as u8,
171*fae6e9adSlinfeng         off: off as i16,
172*fae6e9adSlinfeng         imm: imm as i32,
173*fae6e9adSlinfeng     })
174*fae6e9adSlinfeng }
175*fae6e9adSlinfeng 
176*fae6e9adSlinfeng // TODO Use slice patterns when available and remove this function.
operands_tuple(operands: &[Operand]) -> Result<(Operand, Operand, Operand), String>177*fae6e9adSlinfeng fn operands_tuple(operands: &[Operand]) -> Result<(Operand, Operand, Operand), String> {
178*fae6e9adSlinfeng     match operands.len() {
179*fae6e9adSlinfeng         0 => Ok((Nil, Nil, Nil)),
180*fae6e9adSlinfeng         1 => Ok((operands[0], Nil, Nil)),
181*fae6e9adSlinfeng         2 => Ok((operands[0], operands[1], Nil)),
182*fae6e9adSlinfeng         3 => Ok((operands[0], operands[1], operands[2])),
183*fae6e9adSlinfeng         _ => Err("Too many operands".to_string()),
184*fae6e9adSlinfeng     }
185*fae6e9adSlinfeng }
186*fae6e9adSlinfeng 
encode(inst_type: InstructionType, opc: u8, operands: &[Operand]) -> Result<Insn, String>187*fae6e9adSlinfeng fn encode(inst_type: InstructionType, opc: u8, operands: &[Operand]) -> Result<Insn, String> {
188*fae6e9adSlinfeng     let (a, b, c) = (operands_tuple(operands))?;
189*fae6e9adSlinfeng     match (inst_type, a, b, c) {
190*fae6e9adSlinfeng         (AluBinary, Register(dst), Register(src), Nil) => insn(opc | ebpf::BPF_X, dst, src, 0, 0),
191*fae6e9adSlinfeng         (AluBinary, Register(dst), Integer(imm), Nil) => insn(opc | ebpf::BPF_K, dst, 0, 0, imm),
192*fae6e9adSlinfeng         (AluUnary, Register(dst), Nil, Nil) => insn(opc, dst, 0, 0, 0),
193*fae6e9adSlinfeng         (LoadAbs, Integer(imm), Nil, Nil) => insn(opc, 0, 0, 0, imm),
194*fae6e9adSlinfeng         (LoadInd, Register(src), Integer(imm), Nil) => insn(opc, 0, src, 0, imm),
195*fae6e9adSlinfeng         (LoadReg, Register(dst), Memory(src, off), Nil)
196*fae6e9adSlinfeng         | (StoreReg, Memory(dst, off), Register(src), Nil) => insn(opc, dst, src, off, 0),
197*fae6e9adSlinfeng         (StoreImm, Memory(dst, off), Integer(imm), Nil) => insn(opc, dst, 0, off, imm),
198*fae6e9adSlinfeng         (NoOperand, Nil, Nil, Nil) => insn(opc, 0, 0, 0, 0),
199*fae6e9adSlinfeng         (JumpUnconditional, Integer(off), Nil, Nil) => insn(opc, 0, 0, off, 0),
200*fae6e9adSlinfeng         (JumpConditional, Register(dst), Register(src), Integer(off)) => {
201*fae6e9adSlinfeng             insn(opc | ebpf::BPF_X, dst, src, off, 0)
202*fae6e9adSlinfeng         }
203*fae6e9adSlinfeng         (JumpConditional, Register(dst), Integer(imm), Integer(off)) => {
204*fae6e9adSlinfeng             insn(opc | ebpf::BPF_K, dst, 0, off, imm)
205*fae6e9adSlinfeng         }
206*fae6e9adSlinfeng         (Call, Integer(imm), Nil, Nil) => insn(opc, 0, 0, 0, imm),
207*fae6e9adSlinfeng         (Endian(size), Register(dst), Nil, Nil) => insn(opc, dst, 0, 0, size),
208*fae6e9adSlinfeng         (LoadImm, Register(dst), Integer(imm), Nil) => insn(opc, dst, 0, 0, (imm << 32) >> 32),
209*fae6e9adSlinfeng         _ => Err(format!("Unexpected operands: {operands:?}")),
210*fae6e9adSlinfeng     }
211*fae6e9adSlinfeng }
212*fae6e9adSlinfeng 
assemble_internal(parsed: &[Instruction]) -> Result<Vec<Insn>, String>213*fae6e9adSlinfeng fn assemble_internal(parsed: &[Instruction]) -> Result<Vec<Insn>, String> {
214*fae6e9adSlinfeng     let instruction_map = make_instruction_map();
215*fae6e9adSlinfeng     let mut result: Vec<Insn> = vec![];
216*fae6e9adSlinfeng     for instruction in parsed {
217*fae6e9adSlinfeng         let name = instruction.name.as_str();
218*fae6e9adSlinfeng         match instruction_map.get(name) {
219*fae6e9adSlinfeng             Some(&(inst_type, opc)) => {
220*fae6e9adSlinfeng                 match encode(inst_type, opc, &instruction.operands) {
221*fae6e9adSlinfeng                     Ok(insn) => result.push(insn),
222*fae6e9adSlinfeng                     Err(msg) => return Err(format!("Failed to encode {name}: {msg}")),
223*fae6e9adSlinfeng                 }
224*fae6e9adSlinfeng                 // Special case for lddw.
225*fae6e9adSlinfeng                 if let LoadImm = inst_type {
226*fae6e9adSlinfeng                     if let Integer(imm) = instruction.operands[1] {
227*fae6e9adSlinfeng                         result.push(insn(0, 0, 0, 0, imm >> 32).unwrap());
228*fae6e9adSlinfeng                     }
229*fae6e9adSlinfeng                 }
230*fae6e9adSlinfeng             }
231*fae6e9adSlinfeng             None => return Err(format!("Invalid instruction {name:?}")),
232*fae6e9adSlinfeng         }
233*fae6e9adSlinfeng     }
234*fae6e9adSlinfeng     Ok(result)
235*fae6e9adSlinfeng }
236*fae6e9adSlinfeng 
237*fae6e9adSlinfeng /// Parse assembly source and translate to binary.
238*fae6e9adSlinfeng ///
239*fae6e9adSlinfeng /// # Examples
240*fae6e9adSlinfeng ///
241*fae6e9adSlinfeng /// ```
242*fae6e9adSlinfeng /// use rbpf::assembler::assemble;
243*fae6e9adSlinfeng /// let prog = assemble("add64 r1, 0x605
244*fae6e9adSlinfeng ///                      mov64 r2, 0x32
245*fae6e9adSlinfeng ///                      mov64 r1, r0
246*fae6e9adSlinfeng ///                      be16 r0
247*fae6e9adSlinfeng ///                      neg64 r2
248*fae6e9adSlinfeng ///                      exit");
249*fae6e9adSlinfeng /// println!("{:?}", prog);
250*fae6e9adSlinfeng /// # assert_eq!(prog,
251*fae6e9adSlinfeng /// #            Ok(vec![0x07, 0x01, 0x00, 0x00, 0x05, 0x06, 0x00, 0x00,
252*fae6e9adSlinfeng /// #                    0xb7, 0x02, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00,
253*fae6e9adSlinfeng /// #                    0xbf, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
254*fae6e9adSlinfeng /// #                    0xdc, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
255*fae6e9adSlinfeng /// #                    0x87, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
256*fae6e9adSlinfeng /// #                    0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]));
257*fae6e9adSlinfeng /// ```
258*fae6e9adSlinfeng ///
259*fae6e9adSlinfeng /// This will produce the following output:
260*fae6e9adSlinfeng ///
261*fae6e9adSlinfeng /// ```test
262*fae6e9adSlinfeng /// Ok([0x07, 0x01, 0x00, 0x00, 0x05, 0x06, 0x00, 0x00,
263*fae6e9adSlinfeng ///     0xb7, 0x02, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00,
264*fae6e9adSlinfeng ///     0xbf, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
265*fae6e9adSlinfeng ///     0xdc, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
266*fae6e9adSlinfeng ///     0x87, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
267*fae6e9adSlinfeng ///     0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00])
268*fae6e9adSlinfeng /// ```
assemble(src: &str) -> Result<Vec<u8>, String>269*fae6e9adSlinfeng pub fn assemble(src: &str) -> Result<Vec<u8>, String> {
270*fae6e9adSlinfeng     let parsed = (parse(src))?;
271*fae6e9adSlinfeng     let insns = (assemble_internal(&parsed))?;
272*fae6e9adSlinfeng     let mut result: Vec<u8> = vec![];
273*fae6e9adSlinfeng     for insn in insns {
274*fae6e9adSlinfeng         result.extend_from_slice(&insn.to_array());
275*fae6e9adSlinfeng     }
276*fae6e9adSlinfeng     Ok(result)
277*fae6e9adSlinfeng }
278