1*fae6e9adSlinfeng // SPDX-License-Identifier: (Apache-2.0 OR MIT)
2*fae6e9adSlinfeng // Copyright 2017 Rich Lane <lanerl@gmail.com>
3*fae6e9adSlinfeng
4*fae6e9adSlinfeng //! This module translates eBPF assembly language to binary.
5*fae6e9adSlinfeng
6*fae6e9adSlinfeng use alloc::{
7*fae6e9adSlinfeng collections::BTreeMap,
8*fae6e9adSlinfeng format,
9*fae6e9adSlinfeng string::{String, ToString},
10*fae6e9adSlinfeng vec,
11*fae6e9adSlinfeng vec::Vec,
12*fae6e9adSlinfeng };
13*fae6e9adSlinfeng
14*fae6e9adSlinfeng use self::InstructionType::{
15*fae6e9adSlinfeng AluBinary, AluUnary, Call, Endian, JumpConditional, JumpUnconditional, LoadAbs, LoadImm,
16*fae6e9adSlinfeng LoadInd, LoadReg, NoOperand, StoreImm, StoreReg,
17*fae6e9adSlinfeng };
18*fae6e9adSlinfeng use crate::{
19*fae6e9adSlinfeng asm_parser::{
20*fae6e9adSlinfeng parse, Instruction, Operand,
21*fae6e9adSlinfeng Operand::{Integer, Memory, Nil, Register},
22*fae6e9adSlinfeng },
23*fae6e9adSlinfeng ebpf::{self, Insn},
24*fae6e9adSlinfeng };
25*fae6e9adSlinfeng
26*fae6e9adSlinfeng #[derive(Clone, Copy, Debug, PartialEq)]
27*fae6e9adSlinfeng enum InstructionType {
28*fae6e9adSlinfeng AluBinary,
29*fae6e9adSlinfeng AluUnary,
30*fae6e9adSlinfeng LoadImm,
31*fae6e9adSlinfeng LoadAbs,
32*fae6e9adSlinfeng LoadInd,
33*fae6e9adSlinfeng LoadReg,
34*fae6e9adSlinfeng StoreImm,
35*fae6e9adSlinfeng StoreReg,
36*fae6e9adSlinfeng JumpUnconditional,
37*fae6e9adSlinfeng JumpConditional,
38*fae6e9adSlinfeng Call,
39*fae6e9adSlinfeng Endian(i64),
40*fae6e9adSlinfeng NoOperand,
41*fae6e9adSlinfeng }
42*fae6e9adSlinfeng
make_instruction_map() -> BTreeMap<String, (InstructionType, u8)>43*fae6e9adSlinfeng fn make_instruction_map() -> BTreeMap<String, (InstructionType, u8)> {
44*fae6e9adSlinfeng let mut result = BTreeMap::new();
45*fae6e9adSlinfeng
46*fae6e9adSlinfeng let alu_binary_ops = [
47*fae6e9adSlinfeng ("add", ebpf::BPF_ADD),
48*fae6e9adSlinfeng ("sub", ebpf::BPF_SUB),
49*fae6e9adSlinfeng ("mul", ebpf::BPF_MUL),
50*fae6e9adSlinfeng ("div", ebpf::BPF_DIV),
51*fae6e9adSlinfeng ("or", ebpf::BPF_OR),
52*fae6e9adSlinfeng ("and", ebpf::BPF_AND),
53*fae6e9adSlinfeng ("lsh", ebpf::BPF_LSH),
54*fae6e9adSlinfeng ("rsh", ebpf::BPF_RSH),
55*fae6e9adSlinfeng ("mod", ebpf::BPF_MOD),
56*fae6e9adSlinfeng ("xor", ebpf::BPF_XOR),
57*fae6e9adSlinfeng ("mov", ebpf::BPF_MOV),
58*fae6e9adSlinfeng ("arsh", ebpf::BPF_ARSH),
59*fae6e9adSlinfeng ];
60*fae6e9adSlinfeng
61*fae6e9adSlinfeng let mem_sizes = [
62*fae6e9adSlinfeng ("w", ebpf::BPF_W),
63*fae6e9adSlinfeng ("h", ebpf::BPF_H),
64*fae6e9adSlinfeng ("b", ebpf::BPF_B),
65*fae6e9adSlinfeng ("dw", ebpf::BPF_DW),
66*fae6e9adSlinfeng ];
67*fae6e9adSlinfeng
68*fae6e9adSlinfeng let jump_conditions = [
69*fae6e9adSlinfeng ("jeq", ebpf::BPF_JEQ),
70*fae6e9adSlinfeng ("jgt", ebpf::BPF_JGT),
71*fae6e9adSlinfeng ("jge", ebpf::BPF_JGE),
72*fae6e9adSlinfeng ("jlt", ebpf::BPF_JLT),
73*fae6e9adSlinfeng ("jle", ebpf::BPF_JLE),
74*fae6e9adSlinfeng ("jset", ebpf::BPF_JSET),
75*fae6e9adSlinfeng ("jne", ebpf::BPF_JNE),
76*fae6e9adSlinfeng ("jsgt", ebpf::BPF_JSGT),
77*fae6e9adSlinfeng ("jsge", ebpf::BPF_JSGE),
78*fae6e9adSlinfeng ("jslt", ebpf::BPF_JSLT),
79*fae6e9adSlinfeng ("jsle", ebpf::BPF_JSLE),
80*fae6e9adSlinfeng ];
81*fae6e9adSlinfeng
82*fae6e9adSlinfeng {
83*fae6e9adSlinfeng let mut entry = |name: &str, inst_type: InstructionType, opc: u8| {
84*fae6e9adSlinfeng result.insert(name.to_string(), (inst_type, opc))
85*fae6e9adSlinfeng };
86*fae6e9adSlinfeng
87*fae6e9adSlinfeng // Miscellaneous.
88*fae6e9adSlinfeng entry("exit", NoOperand, ebpf::EXIT);
89*fae6e9adSlinfeng entry("ja", JumpUnconditional, ebpf::JA);
90*fae6e9adSlinfeng entry("call", Call, ebpf::CALL);
91*fae6e9adSlinfeng entry("lddw", LoadImm, ebpf::LD_DW_IMM);
92*fae6e9adSlinfeng
93*fae6e9adSlinfeng // AluUnary.
94*fae6e9adSlinfeng entry("neg", AluUnary, ebpf::NEG64);
95*fae6e9adSlinfeng entry("neg32", AluUnary, ebpf::NEG32);
96*fae6e9adSlinfeng entry("neg64", AluUnary, ebpf::NEG64);
97*fae6e9adSlinfeng
98*fae6e9adSlinfeng // AluBinary.
99*fae6e9adSlinfeng for &(name, opc) in &alu_binary_ops {
100*fae6e9adSlinfeng entry(name, AluBinary, ebpf::BPF_ALU64 | opc);
101*fae6e9adSlinfeng entry(&format!("{name}32"), AluBinary, ebpf::BPF_ALU | opc);
102*fae6e9adSlinfeng entry(&format!("{name}64"), AluBinary, ebpf::BPF_ALU64 | opc);
103*fae6e9adSlinfeng }
104*fae6e9adSlinfeng
105*fae6e9adSlinfeng // LoadAbs, LoadInd, LoadReg, StoreImm, and StoreReg.
106*fae6e9adSlinfeng for &(suffix, size) in &mem_sizes {
107*fae6e9adSlinfeng entry(
108*fae6e9adSlinfeng &format!("ldabs{suffix}"),
109*fae6e9adSlinfeng LoadAbs,
110*fae6e9adSlinfeng ebpf::BPF_ABS | ebpf::BPF_LD | size,
111*fae6e9adSlinfeng );
112*fae6e9adSlinfeng entry(
113*fae6e9adSlinfeng &format!("ldind{suffix}"),
114*fae6e9adSlinfeng LoadInd,
115*fae6e9adSlinfeng ebpf::BPF_IND | ebpf::BPF_LD | size,
116*fae6e9adSlinfeng );
117*fae6e9adSlinfeng entry(
118*fae6e9adSlinfeng &format!("ldx{suffix}"),
119*fae6e9adSlinfeng LoadReg,
120*fae6e9adSlinfeng ebpf::BPF_MEM | ebpf::BPF_LDX | size,
121*fae6e9adSlinfeng );
122*fae6e9adSlinfeng entry(
123*fae6e9adSlinfeng &format!("st{suffix}"),
124*fae6e9adSlinfeng StoreImm,
125*fae6e9adSlinfeng ebpf::BPF_MEM | ebpf::BPF_ST | size,
126*fae6e9adSlinfeng );
127*fae6e9adSlinfeng entry(
128*fae6e9adSlinfeng &format!("stx{suffix}"),
129*fae6e9adSlinfeng StoreReg,
130*fae6e9adSlinfeng ebpf::BPF_MEM | ebpf::BPF_STX | size,
131*fae6e9adSlinfeng );
132*fae6e9adSlinfeng }
133*fae6e9adSlinfeng
134*fae6e9adSlinfeng // JumpConditional.
135*fae6e9adSlinfeng for &(name, condition) in &jump_conditions {
136*fae6e9adSlinfeng entry(name, JumpConditional, ebpf::BPF_JMP | condition);
137*fae6e9adSlinfeng entry(
138*fae6e9adSlinfeng &format!("{name}32"),
139*fae6e9adSlinfeng JumpConditional,
140*fae6e9adSlinfeng ebpf::BPF_JMP32 | condition,
141*fae6e9adSlinfeng );
142*fae6e9adSlinfeng }
143*fae6e9adSlinfeng
144*fae6e9adSlinfeng // Endian.
145*fae6e9adSlinfeng for &size in &[16, 32, 64] {
146*fae6e9adSlinfeng entry(&format!("be{size}"), Endian(size), ebpf::BE);
147*fae6e9adSlinfeng entry(&format!("le{size}"), Endian(size), ebpf::LE);
148*fae6e9adSlinfeng }
149*fae6e9adSlinfeng }
150*fae6e9adSlinfeng
151*fae6e9adSlinfeng result
152*fae6e9adSlinfeng }
153*fae6e9adSlinfeng
insn(opc: u8, dst: i64, src: i64, off: i64, imm: i64) -> Result<Insn, String>154*fae6e9adSlinfeng fn insn(opc: u8, dst: i64, src: i64, off: i64, imm: i64) -> Result<Insn, String> {
155*fae6e9adSlinfeng if !(0..16).contains(&dst) {
156*fae6e9adSlinfeng return Err(format!("Invalid destination register {dst}"));
157*fae6e9adSlinfeng }
158*fae6e9adSlinfeng if dst < 0 || src >= 16 {
159*fae6e9adSlinfeng return Err(format!("Invalid source register {src}"));
160*fae6e9adSlinfeng }
161*fae6e9adSlinfeng if !(-32768..32768).contains(&off) {
162*fae6e9adSlinfeng return Err(format!("Invalid offset {off}"));
163*fae6e9adSlinfeng }
164*fae6e9adSlinfeng if !(-2147483648..2147483648).contains(&imm) {
165*fae6e9adSlinfeng return Err(format!("Invalid immediate {imm}"));
166*fae6e9adSlinfeng }
167*fae6e9adSlinfeng Ok(Insn {
168*fae6e9adSlinfeng opc,
169*fae6e9adSlinfeng dst: dst as u8,
170*fae6e9adSlinfeng src: src as u8,
171*fae6e9adSlinfeng off: off as i16,
172*fae6e9adSlinfeng imm: imm as i32,
173*fae6e9adSlinfeng })
174*fae6e9adSlinfeng }
175*fae6e9adSlinfeng
176*fae6e9adSlinfeng // TODO Use slice patterns when available and remove this function.
operands_tuple(operands: &[Operand]) -> Result<(Operand, Operand, Operand), String>177*fae6e9adSlinfeng fn operands_tuple(operands: &[Operand]) -> Result<(Operand, Operand, Operand), String> {
178*fae6e9adSlinfeng match operands.len() {
179*fae6e9adSlinfeng 0 => Ok((Nil, Nil, Nil)),
180*fae6e9adSlinfeng 1 => Ok((operands[0], Nil, Nil)),
181*fae6e9adSlinfeng 2 => Ok((operands[0], operands[1], Nil)),
182*fae6e9adSlinfeng 3 => Ok((operands[0], operands[1], operands[2])),
183*fae6e9adSlinfeng _ => Err("Too many operands".to_string()),
184*fae6e9adSlinfeng }
185*fae6e9adSlinfeng }
186*fae6e9adSlinfeng
encode(inst_type: InstructionType, opc: u8, operands: &[Operand]) -> Result<Insn, String>187*fae6e9adSlinfeng fn encode(inst_type: InstructionType, opc: u8, operands: &[Operand]) -> Result<Insn, String> {
188*fae6e9adSlinfeng let (a, b, c) = (operands_tuple(operands))?;
189*fae6e9adSlinfeng match (inst_type, a, b, c) {
190*fae6e9adSlinfeng (AluBinary, Register(dst), Register(src), Nil) => insn(opc | ebpf::BPF_X, dst, src, 0, 0),
191*fae6e9adSlinfeng (AluBinary, Register(dst), Integer(imm), Nil) => insn(opc | ebpf::BPF_K, dst, 0, 0, imm),
192*fae6e9adSlinfeng (AluUnary, Register(dst), Nil, Nil) => insn(opc, dst, 0, 0, 0),
193*fae6e9adSlinfeng (LoadAbs, Integer(imm), Nil, Nil) => insn(opc, 0, 0, 0, imm),
194*fae6e9adSlinfeng (LoadInd, Register(src), Integer(imm), Nil) => insn(opc, 0, src, 0, imm),
195*fae6e9adSlinfeng (LoadReg, Register(dst), Memory(src, off), Nil)
196*fae6e9adSlinfeng | (StoreReg, Memory(dst, off), Register(src), Nil) => insn(opc, dst, src, off, 0),
197*fae6e9adSlinfeng (StoreImm, Memory(dst, off), Integer(imm), Nil) => insn(opc, dst, 0, off, imm),
198*fae6e9adSlinfeng (NoOperand, Nil, Nil, Nil) => insn(opc, 0, 0, 0, 0),
199*fae6e9adSlinfeng (JumpUnconditional, Integer(off), Nil, Nil) => insn(opc, 0, 0, off, 0),
200*fae6e9adSlinfeng (JumpConditional, Register(dst), Register(src), Integer(off)) => {
201*fae6e9adSlinfeng insn(opc | ebpf::BPF_X, dst, src, off, 0)
202*fae6e9adSlinfeng }
203*fae6e9adSlinfeng (JumpConditional, Register(dst), Integer(imm), Integer(off)) => {
204*fae6e9adSlinfeng insn(opc | ebpf::BPF_K, dst, 0, off, imm)
205*fae6e9adSlinfeng }
206*fae6e9adSlinfeng (Call, Integer(imm), Nil, Nil) => insn(opc, 0, 0, 0, imm),
207*fae6e9adSlinfeng (Endian(size), Register(dst), Nil, Nil) => insn(opc, dst, 0, 0, size),
208*fae6e9adSlinfeng (LoadImm, Register(dst), Integer(imm), Nil) => insn(opc, dst, 0, 0, (imm << 32) >> 32),
209*fae6e9adSlinfeng _ => Err(format!("Unexpected operands: {operands:?}")),
210*fae6e9adSlinfeng }
211*fae6e9adSlinfeng }
212*fae6e9adSlinfeng
assemble_internal(parsed: &[Instruction]) -> Result<Vec<Insn>, String>213*fae6e9adSlinfeng fn assemble_internal(parsed: &[Instruction]) -> Result<Vec<Insn>, String> {
214*fae6e9adSlinfeng let instruction_map = make_instruction_map();
215*fae6e9adSlinfeng let mut result: Vec<Insn> = vec![];
216*fae6e9adSlinfeng for instruction in parsed {
217*fae6e9adSlinfeng let name = instruction.name.as_str();
218*fae6e9adSlinfeng match instruction_map.get(name) {
219*fae6e9adSlinfeng Some(&(inst_type, opc)) => {
220*fae6e9adSlinfeng match encode(inst_type, opc, &instruction.operands) {
221*fae6e9adSlinfeng Ok(insn) => result.push(insn),
222*fae6e9adSlinfeng Err(msg) => return Err(format!("Failed to encode {name}: {msg}")),
223*fae6e9adSlinfeng }
224*fae6e9adSlinfeng // Special case for lddw.
225*fae6e9adSlinfeng if let LoadImm = inst_type {
226*fae6e9adSlinfeng if let Integer(imm) = instruction.operands[1] {
227*fae6e9adSlinfeng result.push(insn(0, 0, 0, 0, imm >> 32).unwrap());
228*fae6e9adSlinfeng }
229*fae6e9adSlinfeng }
230*fae6e9adSlinfeng }
231*fae6e9adSlinfeng None => return Err(format!("Invalid instruction {name:?}")),
232*fae6e9adSlinfeng }
233*fae6e9adSlinfeng }
234*fae6e9adSlinfeng Ok(result)
235*fae6e9adSlinfeng }
236*fae6e9adSlinfeng
237*fae6e9adSlinfeng /// Parse assembly source and translate to binary.
238*fae6e9adSlinfeng ///
239*fae6e9adSlinfeng /// # Examples
240*fae6e9adSlinfeng ///
241*fae6e9adSlinfeng /// ```
242*fae6e9adSlinfeng /// use rbpf::assembler::assemble;
243*fae6e9adSlinfeng /// let prog = assemble("add64 r1, 0x605
244*fae6e9adSlinfeng /// mov64 r2, 0x32
245*fae6e9adSlinfeng /// mov64 r1, r0
246*fae6e9adSlinfeng /// be16 r0
247*fae6e9adSlinfeng /// neg64 r2
248*fae6e9adSlinfeng /// exit");
249*fae6e9adSlinfeng /// println!("{:?}", prog);
250*fae6e9adSlinfeng /// # assert_eq!(prog,
251*fae6e9adSlinfeng /// # Ok(vec![0x07, 0x01, 0x00, 0x00, 0x05, 0x06, 0x00, 0x00,
252*fae6e9adSlinfeng /// # 0xb7, 0x02, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00,
253*fae6e9adSlinfeng /// # 0xbf, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
254*fae6e9adSlinfeng /// # 0xdc, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
255*fae6e9adSlinfeng /// # 0x87, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
256*fae6e9adSlinfeng /// # 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]));
257*fae6e9adSlinfeng /// ```
258*fae6e9adSlinfeng ///
259*fae6e9adSlinfeng /// This will produce the following output:
260*fae6e9adSlinfeng ///
261*fae6e9adSlinfeng /// ```test
262*fae6e9adSlinfeng /// Ok([0x07, 0x01, 0x00, 0x00, 0x05, 0x06, 0x00, 0x00,
263*fae6e9adSlinfeng /// 0xb7, 0x02, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00,
264*fae6e9adSlinfeng /// 0xbf, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
265*fae6e9adSlinfeng /// 0xdc, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
266*fae6e9adSlinfeng /// 0x87, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
267*fae6e9adSlinfeng /// 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00])
268*fae6e9adSlinfeng /// ```
assemble(src: &str) -> Result<Vec<u8>, String>269*fae6e9adSlinfeng pub fn assemble(src: &str) -> Result<Vec<u8>, String> {
270*fae6e9adSlinfeng let parsed = (parse(src))?;
271*fae6e9adSlinfeng let insns = (assemble_internal(&parsed))?;
272*fae6e9adSlinfeng let mut result: Vec<u8> = vec![];
273*fae6e9adSlinfeng for insn in insns {
274*fae6e9adSlinfeng result.extend_from_slice(&insn.to_array());
275*fae6e9adSlinfeng }
276*fae6e9adSlinfeng Ok(result)
277*fae6e9adSlinfeng }
278