1/* bpf_jit.S: Packet/header access helper functions
2 * for PPC64 BPF compiler.
3 *
4 * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; version 2
9 * of the License.
10 */
11
12#include <asm/ppc_asm.h>
13#include "bpf_jit.h"
14
15/*
16 * All of these routines are called directly from generated code,
17 * whose register usage is:
18 *
19 * r3		skb
20 * r4,r5	A,X
21 * r6		*** address parameter to helper ***
22 * r7-r10	scratch
23 * r14		skb->data
24 * r15		skb headlen
25 * r16-31	M[]
26 */
27
28/*
29 * To consider: These helpers are so small it could be better to just
30 * generate them inline.  Inline code can do the simple headlen check
31 * then branch directly to slow_path_XXX if required.  (In fact, could
32 * load a spare GPR with the address of slow_path_generic and pass size
33 * as an argument, making the call site a mtlr, li and bllr.)
34 */
35	.globl	sk_load_word
36sk_load_word:
37	cmpdi	r_addr, 0
38	blt	bpf_slow_path_word_neg
39	.globl	sk_load_word_positive_offset
40sk_load_word_positive_offset:
41	/* Are we accessing past headlen? */
42	subi	r_scratch1, r_HL, 4
43	cmpd	r_scratch1, r_addr
44	blt	bpf_slow_path_word
45	/* Nope, just hitting the header.  cr0 here is eq or gt! */
46	lwzx	r_A, r_D, r_addr
47	/* When big endian we don't need to byteswap. */
48	blr	/* Return success, cr0 != LT */
49
50	.globl	sk_load_half
51sk_load_half:
52	cmpdi	r_addr, 0
53	blt	bpf_slow_path_half_neg
54	.globl	sk_load_half_positive_offset
55sk_load_half_positive_offset:
56	subi	r_scratch1, r_HL, 2
57	cmpd	r_scratch1, r_addr
58	blt	bpf_slow_path_half
59	lhzx	r_A, r_D, r_addr
60	blr
61
62	.globl	sk_load_byte
63sk_load_byte:
64	cmpdi	r_addr, 0
65	blt	bpf_slow_path_byte_neg
66	.globl	sk_load_byte_positive_offset
67sk_load_byte_positive_offset:
68	cmpd	r_HL, r_addr
69	ble	bpf_slow_path_byte
70	lbzx	r_A, r_D, r_addr
71	blr
72
73/*
74 * BPF_S_LDX_B_MSH: ldxb  4*([offset]&0xf)
75 * r_addr is the offset value
76 */
77	.globl sk_load_byte_msh
78sk_load_byte_msh:
79	cmpdi	r_addr, 0
80	blt	bpf_slow_path_byte_msh_neg
81	.globl sk_load_byte_msh_positive_offset
82sk_load_byte_msh_positive_offset:
83	cmpd	r_HL, r_addr
84	ble	bpf_slow_path_byte_msh
85	lbzx	r_X, r_D, r_addr
86	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
87	blr
88
89/* Call out to skb_copy_bits:
90 * We'll need to back up our volatile regs first; we have
91 * local variable space at r1+(BPF_PPC_STACK_BASIC).
92 * Allocate a new stack frame here to remain ABI-compliant in
93 * stashing LR.
94 */
95#define bpf_slow_path_common(SIZE)				\
96	mflr	r0;						\
97	std	r0, 16(r1);					\
98	/* R3 goes in parameter space of caller's frame */	\
99	std	r_skb, (BPF_PPC_STACKFRAME+48)(r1);		\
100	std	r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);		\
101	std	r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);		\
102	addi	r5, r1, BPF_PPC_STACK_BASIC+(2*8);		\
103	stdu	r1, -BPF_PPC_SLOWPATH_FRAME(r1);		\
104	/* R3 = r_skb, as passed */				\
105	mr	r4, r_addr;					\
106	li	r6, SIZE;					\
107	bl	skb_copy_bits;					\
108	/* R3 = 0 on success */					\
109	addi	r1, r1, BPF_PPC_SLOWPATH_FRAME;			\
110	ld	r0, 16(r1);					\
111	ld	r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);		\
112	ld	r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);		\
113	mtlr	r0;						\
114	cmpdi	r3, 0;						\
115	blt	bpf_error;	/* cr0 = LT */			\
116	ld	r_skb, (BPF_PPC_STACKFRAME+48)(r1);		\
117	/* Great success! */
118
119bpf_slow_path_word:
120	bpf_slow_path_common(4)
121	/* Data value is on stack, and cr0 != LT */
122	lwz	r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
123	blr
124
125bpf_slow_path_half:
126	bpf_slow_path_common(2)
127	lhz	r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
128	blr
129
130bpf_slow_path_byte:
131	bpf_slow_path_common(1)
132	lbz	r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
133	blr
134
135bpf_slow_path_byte_msh:
136	bpf_slow_path_common(1)
137	lbz	r_X, BPF_PPC_STACK_BASIC+(2*8)(r1)
138	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
139	blr
140
141/* Call out to bpf_internal_load_pointer_neg_helper:
142 * We'll need to back up our volatile regs first; we have
143 * local variable space at r1+(BPF_PPC_STACK_BASIC).
144 * Allocate a new stack frame here to remain ABI-compliant in
145 * stashing LR.
146 */
147#define sk_negative_common(SIZE)				\
148	mflr	r0;						\
149	std	r0, 16(r1);					\
150	/* R3 goes in parameter space of caller's frame */	\
151	std	r_skb, (BPF_PPC_STACKFRAME+48)(r1);		\
152	std	r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);		\
153	std	r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);		\
154	stdu	r1, -BPF_PPC_SLOWPATH_FRAME(r1);		\
155	/* R3 = r_skb, as passed */				\
156	mr	r4, r_addr;					\
157	li	r5, SIZE;					\
158	bl	bpf_internal_load_pointer_neg_helper;		\
159	/* R3 != 0 on success */				\
160	addi	r1, r1, BPF_PPC_SLOWPATH_FRAME;			\
161	ld	r0, 16(r1);					\
162	ld	r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);		\
163	ld	r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);		\
164	mtlr	r0;						\
165	cmpldi	r3, 0;						\
166	beq	bpf_error_slow;	/* cr0 = EQ */			\
167	mr	r_addr, r3;					\
168	ld	r_skb, (BPF_PPC_STACKFRAME+48)(r1);		\
169	/* Great success! */
170
171bpf_slow_path_word_neg:
172	lis     r_scratch1,-32	/* SKF_LL_OFF */
173	cmpd	r_addr, r_scratch1	/* addr < SKF_* */
174	blt	bpf_error	/* cr0 = LT */
175	.globl	sk_load_word_negative_offset
176sk_load_word_negative_offset:
177	sk_negative_common(4)
178	lwz	r_A, 0(r_addr)
179	blr
180
181bpf_slow_path_half_neg:
182	lis     r_scratch1,-32	/* SKF_LL_OFF */
183	cmpd	r_addr, r_scratch1	/* addr < SKF_* */
184	blt	bpf_error	/* cr0 = LT */
185	.globl	sk_load_half_negative_offset
186sk_load_half_negative_offset:
187	sk_negative_common(2)
188	lhz	r_A, 0(r_addr)
189	blr
190
191bpf_slow_path_byte_neg:
192	lis     r_scratch1,-32	/* SKF_LL_OFF */
193	cmpd	r_addr, r_scratch1	/* addr < SKF_* */
194	blt	bpf_error	/* cr0 = LT */
195	.globl	sk_load_byte_negative_offset
196sk_load_byte_negative_offset:
197	sk_negative_common(1)
198	lbz	r_A, 0(r_addr)
199	blr
200
201bpf_slow_path_byte_msh_neg:
202	lis     r_scratch1,-32	/* SKF_LL_OFF */
203	cmpd	r_addr, r_scratch1	/* addr < SKF_* */
204	blt	bpf_error	/* cr0 = LT */
205	.globl	sk_load_byte_msh_negative_offset
206sk_load_byte_msh_negative_offset:
207	sk_negative_common(1)
208	lbz	r_X, 0(r_addr)
209	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
210	blr
211
212bpf_error_slow:
213	/* fabricate a cr0 = lt */
214	li	r_scratch1, -1
215	cmpdi	r_scratch1, 0
216bpf_error:
217	/* Entered with cr0 = lt */
218	li	r3, 0
219	/* Generated code will 'blt epilogue', returning 0. */
220	blr
221