1 // SPDX-License-Identifier: GPL-2.0-only
2
3 #ifndef _NFT_SET_PIPAPO_H
4
5 #include <linux/log2.h>
6 #include <net/ipv6.h> /* For the maximum length of a field */
7
8 /* Count of concatenated fields depends on count of 32-bit nftables registers */
9 #define NFT_PIPAPO_MAX_FIELDS NFT_REG32_COUNT
10
11 /* Restrict usage to multiple fields, make sure rbtree is used otherwise */
12 #define NFT_PIPAPO_MIN_FIELDS 2
13
14 /* Largest supported field size */
15 #define NFT_PIPAPO_MAX_BYTES (sizeof(struct in6_addr))
16 #define NFT_PIPAPO_MAX_BITS (NFT_PIPAPO_MAX_BYTES * BITS_PER_BYTE)
17
18 /* Bits to be grouped together in table buckets depending on set size */
19 #define NFT_PIPAPO_GROUP_BITS_INIT NFT_PIPAPO_GROUP_BITS_SMALL_SET
20 #define NFT_PIPAPO_GROUP_BITS_SMALL_SET 8
21 #define NFT_PIPAPO_GROUP_BITS_LARGE_SET 4
22 #define NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4 \
23 BUILD_BUG_ON((NFT_PIPAPO_GROUP_BITS_SMALL_SET != 8) || \
24 (NFT_PIPAPO_GROUP_BITS_LARGE_SET != 4))
25 #define NFT_PIPAPO_GROUPS_PER_BYTE(f) (BITS_PER_BYTE / (f)->bb)
26
27 /* If a lookup table gets bigger than NFT_PIPAPO_LT_SIZE_HIGH, switch to the
28 * small group width, and switch to the big group width if the table gets
29 * smaller than NFT_PIPAPO_LT_SIZE_LOW.
30 *
31 * Picking 2MiB as threshold (for a single table) avoids as much as possible
32 * crossing page boundaries on most architectures (x86-64 and MIPS huge pages,
33 * ARMv7 supersections, POWER "large" pages, SPARC Level 1 regions, etc.), which
34 * keeps performance nice in case kvmalloc() gives us non-contiguous areas.
35 */
36 #define NFT_PIPAPO_LT_SIZE_THRESHOLD (1 << 21)
37 #define NFT_PIPAPO_LT_SIZE_HYSTERESIS (1 << 16)
38 #define NFT_PIPAPO_LT_SIZE_HIGH NFT_PIPAPO_LT_SIZE_THRESHOLD
39 #define NFT_PIPAPO_LT_SIZE_LOW NFT_PIPAPO_LT_SIZE_THRESHOLD - \
40 NFT_PIPAPO_LT_SIZE_HYSTERESIS
41
42 /* Fields are padded to 32 bits in input registers */
43 #define NFT_PIPAPO_GROUPS_PADDED_SIZE(f) \
44 (round_up((f)->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f), sizeof(u32)))
45 #define NFT_PIPAPO_GROUPS_PADDING(f) \
46 (NFT_PIPAPO_GROUPS_PADDED_SIZE(f) - (f)->groups / \
47 NFT_PIPAPO_GROUPS_PER_BYTE(f))
48
49 /* Number of buckets given by 2 ^ n, with n bucket bits */
50 #define NFT_PIPAPO_BUCKETS(bb) (1 << (bb))
51
52 /* Each n-bit range maps to up to n * 2 rules */
53 #define NFT_PIPAPO_MAP_NBITS (const_ilog2(NFT_PIPAPO_MAX_BITS * 2))
54
55 /* Use the rest of mapping table buckets for rule indices, but it makes no sense
56 * to exceed 32 bits
57 */
58 #if BITS_PER_LONG == 64
59 #define NFT_PIPAPO_MAP_TOBITS 32
60 #else
61 #define NFT_PIPAPO_MAP_TOBITS (BITS_PER_LONG - NFT_PIPAPO_MAP_NBITS)
62 #endif
63
64 /* ...which gives us the highest allowed index for a rule */
65 #define NFT_PIPAPO_RULE0_MAX ((1UL << (NFT_PIPAPO_MAP_TOBITS - 1)) \
66 - (1UL << NFT_PIPAPO_MAP_NBITS))
67
68 /* Definitions for vectorised implementations */
69 #ifdef NFT_PIPAPO_ALIGN
70 #define NFT_PIPAPO_ALIGN_HEADROOM \
71 (NFT_PIPAPO_ALIGN - ARCH_KMALLOC_MINALIGN)
72 #define NFT_PIPAPO_LT_ALIGN(lt) (PTR_ALIGN((lt), NFT_PIPAPO_ALIGN))
73 #define NFT_PIPAPO_LT_ASSIGN(field, x) \
74 do { \
75 (field)->lt_aligned = NFT_PIPAPO_LT_ALIGN(x); \
76 (field)->lt = (x); \
77 } while (0)
78 #else
79 #define NFT_PIPAPO_ALIGN_HEADROOM 0
80 #define NFT_PIPAPO_LT_ALIGN(lt) (lt)
81 #define NFT_PIPAPO_LT_ASSIGN(field, x) ((field)->lt = (x))
82 #endif /* NFT_PIPAPO_ALIGN */
83
84 #define nft_pipapo_for_each_field(field, index, match) \
85 for ((field) = (match)->f, (index) = 0; \
86 (index) < (match)->field_count; \
87 (index)++, (field)++)
88
89 /**
90 * union nft_pipapo_map_bucket - Bucket of mapping table
91 * @to: First rule number (in next field) this rule maps to
92 * @n: Number of rules (in next field) this rule maps to
93 * @e: If there's no next field, pointer to element this rule maps to
94 */
95 union nft_pipapo_map_bucket {
96 struct {
97 #if BITS_PER_LONG == 64
98 static_assert(NFT_PIPAPO_MAP_TOBITS <= 32);
99 u32 to;
100
101 static_assert(NFT_PIPAPO_MAP_NBITS <= 32);
102 u32 n;
103 #else
104 unsigned long to:NFT_PIPAPO_MAP_TOBITS;
105 unsigned long n:NFT_PIPAPO_MAP_NBITS;
106 #endif
107 };
108 struct nft_pipapo_elem *e;
109 };
110
111 /**
112 * struct nft_pipapo_field - Lookup, mapping tables and related data for a field
113 * @groups: Amount of bit groups
114 * @rules: Number of inserted rules
115 * @bsize: Size of each bucket in lookup table, in longs
116 * @bb: Number of bits grouped together in lookup table buckets
117 * @lt: Lookup table: 'groups' rows of buckets
118 * @lt_aligned: Version of @lt aligned to NFT_PIPAPO_ALIGN bytes
119 * @mt: Mapping table: one bucket per rule
120 */
121 struct nft_pipapo_field {
122 int groups;
123 unsigned long rules;
124 size_t bsize;
125 int bb;
126 #ifdef NFT_PIPAPO_ALIGN
127 unsigned long *lt_aligned;
128 #endif
129 unsigned long *lt;
130 union nft_pipapo_map_bucket *mt;
131 };
132
133 /**
134 * struct nft_pipapo_match - Data used for lookup and matching
135 * @field_count Amount of fields in set
136 * @scratch: Preallocated per-CPU maps for partial matching results
137 * @scratch_aligned: Version of @scratch aligned to NFT_PIPAPO_ALIGN bytes
138 * @bsize_max: Maximum lookup table bucket size of all fields, in longs
139 * @rcu Matching data is swapped on commits
140 * @f: Fields, with lookup and mapping tables
141 */
142 struct nft_pipapo_match {
143 int field_count;
144 #ifdef NFT_PIPAPO_ALIGN
145 unsigned long * __percpu *scratch_aligned;
146 #endif
147 unsigned long * __percpu *scratch;
148 size_t bsize_max;
149 struct rcu_head rcu;
150 struct nft_pipapo_field f[];
151 };
152
153 /**
154 * struct nft_pipapo - Representation of a set
155 * @match: Currently in-use matching data
156 * @clone: Copy where pending insertions and deletions are kept
157 * @width: Total bytes to be matched for one packet, including padding
158 * @dirty: Working copy has pending insertions or deletions
159 * @last_gc: Timestamp of last garbage collection run, jiffies
160 */
161 struct nft_pipapo {
162 struct nft_pipapo_match __rcu *match;
163 struct nft_pipapo_match *clone;
164 int width;
165 bool dirty;
166 unsigned long last_gc;
167 };
168
169 struct nft_pipapo_elem;
170
171 /**
172 * struct nft_pipapo_elem - API-facing representation of single set element
173 * @ext: nftables API extensions
174 */
175 struct nft_pipapo_elem {
176 struct nft_set_ext ext;
177 };
178
179 int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst,
180 union nft_pipapo_map_bucket *mt, bool match_only);
181
182 /**
183 * pipapo_and_field_buckets_4bit() - Intersect 4-bit buckets
184 * @f: Field including lookup table
185 * @dst: Area to store result
186 * @data: Input data selecting table buckets
187 */
pipapo_and_field_buckets_4bit(struct nft_pipapo_field * f,unsigned long * dst,const u8 * data)188 static inline void pipapo_and_field_buckets_4bit(struct nft_pipapo_field *f,
189 unsigned long *dst,
190 const u8 *data)
191 {
192 unsigned long *lt = NFT_PIPAPO_LT_ALIGN(f->lt);
193 int group;
194
195 for (group = 0; group < f->groups; group += BITS_PER_BYTE / 4, data++) {
196 u8 v;
197
198 v = *data >> 4;
199 __bitmap_and(dst, dst, lt + v * f->bsize,
200 f->bsize * BITS_PER_LONG);
201 lt += f->bsize * NFT_PIPAPO_BUCKETS(4);
202
203 v = *data & 0x0f;
204 __bitmap_and(dst, dst, lt + v * f->bsize,
205 f->bsize * BITS_PER_LONG);
206 lt += f->bsize * NFT_PIPAPO_BUCKETS(4);
207 }
208 }
209
210 /**
211 * pipapo_and_field_buckets_8bit() - Intersect 8-bit buckets
212 * @f: Field including lookup table
213 * @dst: Area to store result
214 * @data: Input data selecting table buckets
215 */
pipapo_and_field_buckets_8bit(struct nft_pipapo_field * f,unsigned long * dst,const u8 * data)216 static inline void pipapo_and_field_buckets_8bit(struct nft_pipapo_field *f,
217 unsigned long *dst,
218 const u8 *data)
219 {
220 unsigned long *lt = NFT_PIPAPO_LT_ALIGN(f->lt);
221 int group;
222
223 for (group = 0; group < f->groups; group++, data++) {
224 __bitmap_and(dst, dst, lt + *data * f->bsize,
225 f->bsize * BITS_PER_LONG);
226 lt += f->bsize * NFT_PIPAPO_BUCKETS(8);
227 }
228 }
229
230 /**
231 * pipapo_estimate_size() - Estimate worst-case for set size
232 * @desc: Set description, element count and field description used here
233 *
234 * The size for this set type can vary dramatically, as it depends on the number
235 * of rules (composing netmasks) the entries expand to. We compute the worst
236 * case here.
237 *
238 * In general, for a non-ranged entry or a single composing netmask, we need
239 * one bit in each of the sixteen NFT_PIPAPO_BUCKETS, for each 4-bit group (that
240 * is, each input bit needs four bits of matching data), plus a bucket in the
241 * mapping table for each field.
242 *
243 * Return: worst-case set size in bytes, 0 on any overflow
244 */
pipapo_estimate_size(const struct nft_set_desc * desc)245 static u64 pipapo_estimate_size(const struct nft_set_desc *desc)
246 {
247 unsigned long entry_size;
248 u64 size;
249 int i;
250
251 for (i = 0, entry_size = 0; i < desc->field_count; i++) {
252 unsigned long rules;
253
254 if (desc->field_len[i] > NFT_PIPAPO_MAX_BYTES)
255 return 0;
256
257 /* Worst-case ranges for each concatenated field: each n-bit
258 * field can expand to up to n * 2 rules in each bucket, and
259 * each rule also needs a mapping bucket.
260 */
261 rules = ilog2(desc->field_len[i] * BITS_PER_BYTE) * 2;
262 entry_size += rules *
263 NFT_PIPAPO_BUCKETS(NFT_PIPAPO_GROUP_BITS_INIT) /
264 BITS_PER_BYTE;
265 entry_size += rules * sizeof(union nft_pipapo_map_bucket);
266 }
267
268 /* Rules in lookup and mapping tables are needed for each entry */
269 size = desc->size * entry_size;
270 if (size && div_u64(size, desc->size) != entry_size)
271 return 0;
272
273 size += sizeof(struct nft_pipapo) + sizeof(struct nft_pipapo_match) * 2;
274
275 size += sizeof(struct nft_pipapo_field) * desc->field_count;
276
277 return size;
278 }
279
280 #endif /* _NFT_SET_PIPAPO_H */
281