1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5 */
6
7 #include "rxe.h"
8
9 #define RXE_POOL_TIMEOUT (200)
10 #define RXE_POOL_ALIGN (16)
11
12 static const struct rxe_type_info {
13 const char *name;
14 size_t size;
15 size_t elem_offset;
16 void (*cleanup)(struct rxe_pool_elem *elem);
17 u32 min_index;
18 u32 max_index;
19 u32 max_elem;
20 } rxe_type_info[RXE_NUM_TYPES] = {
21 [RXE_TYPE_UC] = {
22 .name = "uc",
23 .size = sizeof(struct rxe_ucontext),
24 .elem_offset = offsetof(struct rxe_ucontext, elem),
25 .min_index = 1,
26 .max_index = RXE_MAX_UCONTEXT,
27 .max_elem = RXE_MAX_UCONTEXT,
28 },
29 [RXE_TYPE_PD] = {
30 .name = "pd",
31 .size = sizeof(struct rxe_pd),
32 .elem_offset = offsetof(struct rxe_pd, elem),
33 .min_index = 1,
34 .max_index = RXE_MAX_PD,
35 .max_elem = RXE_MAX_PD,
36 },
37 [RXE_TYPE_AH] = {
38 .name = "ah",
39 .size = sizeof(struct rxe_ah),
40 .elem_offset = offsetof(struct rxe_ah, elem),
41 .min_index = RXE_MIN_AH_INDEX,
42 .max_index = RXE_MAX_AH_INDEX,
43 .max_elem = RXE_MAX_AH,
44 },
45 [RXE_TYPE_SRQ] = {
46 .name = "srq",
47 .size = sizeof(struct rxe_srq),
48 .elem_offset = offsetof(struct rxe_srq, elem),
49 .cleanup = rxe_srq_cleanup,
50 .min_index = RXE_MIN_SRQ_INDEX,
51 .max_index = RXE_MAX_SRQ_INDEX,
52 .max_elem = RXE_MAX_SRQ,
53 },
54 [RXE_TYPE_QP] = {
55 .name = "qp",
56 .size = sizeof(struct rxe_qp),
57 .elem_offset = offsetof(struct rxe_qp, elem),
58 .cleanup = rxe_qp_cleanup,
59 .min_index = RXE_MIN_QP_INDEX,
60 .max_index = RXE_MAX_QP_INDEX,
61 .max_elem = RXE_MAX_QP,
62 },
63 [RXE_TYPE_CQ] = {
64 .name = "cq",
65 .size = sizeof(struct rxe_cq),
66 .elem_offset = offsetof(struct rxe_cq, elem),
67 .cleanup = rxe_cq_cleanup,
68 .min_index = 1,
69 .max_index = RXE_MAX_CQ,
70 .max_elem = RXE_MAX_CQ,
71 },
72 [RXE_TYPE_MR] = {
73 .name = "mr",
74 .size = sizeof(struct rxe_mr),
75 .elem_offset = offsetof(struct rxe_mr, elem),
76 .cleanup = rxe_mr_cleanup,
77 .min_index = RXE_MIN_MR_INDEX,
78 .max_index = RXE_MAX_MR_INDEX,
79 .max_elem = RXE_MAX_MR,
80 },
81 [RXE_TYPE_MW] = {
82 .name = "mw",
83 .size = sizeof(struct rxe_mw),
84 .elem_offset = offsetof(struct rxe_mw, elem),
85 .cleanup = rxe_mw_cleanup,
86 .min_index = RXE_MIN_MW_INDEX,
87 .max_index = RXE_MAX_MW_INDEX,
88 .max_elem = RXE_MAX_MW,
89 },
90 };
91
rxe_pool_init(struct rxe_dev * rxe,struct rxe_pool * pool,enum rxe_elem_type type)92 void rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool,
93 enum rxe_elem_type type)
94 {
95 const struct rxe_type_info *info = &rxe_type_info[type];
96
97 memset(pool, 0, sizeof(*pool));
98
99 pool->rxe = rxe;
100 pool->name = info->name;
101 pool->type = type;
102 pool->max_elem = info->max_elem;
103 pool->elem_size = ALIGN(info->size, RXE_POOL_ALIGN);
104 pool->elem_offset = info->elem_offset;
105 pool->cleanup = info->cleanup;
106
107 atomic_set(&pool->num_elem, 0);
108
109 xa_init_flags(&pool->xa, XA_FLAGS_ALLOC);
110 pool->limit.min = info->min_index;
111 pool->limit.max = info->max_index;
112 }
113
rxe_pool_cleanup(struct rxe_pool * pool)114 void rxe_pool_cleanup(struct rxe_pool *pool)
115 {
116 WARN_ON(!xa_empty(&pool->xa));
117 }
118
rxe_alloc(struct rxe_pool * pool)119 void *rxe_alloc(struct rxe_pool *pool)
120 {
121 struct rxe_pool_elem *elem;
122 void *obj;
123 int err;
124
125 if (WARN_ON(!(pool->type == RXE_TYPE_MR)))
126 return NULL;
127
128 if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
129 goto err_cnt;
130
131 obj = kzalloc(pool->elem_size, GFP_KERNEL);
132 if (!obj)
133 goto err_cnt;
134
135 elem = (struct rxe_pool_elem *)((u8 *)obj + pool->elem_offset);
136
137 elem->pool = pool;
138 elem->obj = obj;
139 kref_init(&elem->ref_cnt);
140 init_completion(&elem->complete);
141
142 /* allocate index in array but leave pointer as NULL so it
143 * can't be looked up until rxe_finalize() is called
144 */
145 err = xa_alloc_cyclic(&pool->xa, &elem->index, NULL, pool->limit,
146 &pool->next, GFP_KERNEL);
147 if (err < 0)
148 goto err_free;
149
150 return obj;
151
152 err_free:
153 kfree(obj);
154 err_cnt:
155 atomic_dec(&pool->num_elem);
156 return NULL;
157 }
158
__rxe_add_to_pool(struct rxe_pool * pool,struct rxe_pool_elem * elem,bool sleepable)159 int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem,
160 bool sleepable)
161 {
162 int err;
163 gfp_t gfp_flags;
164
165 if (WARN_ON(pool->type == RXE_TYPE_MR))
166 return -EINVAL;
167
168 if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
169 goto err_cnt;
170
171 elem->pool = pool;
172 elem->obj = (u8 *)elem - pool->elem_offset;
173 kref_init(&elem->ref_cnt);
174 init_completion(&elem->complete);
175
176 /* AH objects are unique in that the create_ah verb
177 * can be called in atomic context. If the create_ah
178 * call is not sleepable use GFP_ATOMIC.
179 */
180 gfp_flags = sleepable ? GFP_KERNEL : GFP_ATOMIC;
181
182 if (sleepable)
183 might_sleep();
184 err = xa_alloc_cyclic(&pool->xa, &elem->index, NULL, pool->limit,
185 &pool->next, gfp_flags);
186 if (err < 0)
187 goto err_cnt;
188
189 return 0;
190
191 err_cnt:
192 atomic_dec(&pool->num_elem);
193 return -EINVAL;
194 }
195
rxe_pool_get_index(struct rxe_pool * pool,u32 index)196 void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
197 {
198 struct rxe_pool_elem *elem;
199 struct xarray *xa = &pool->xa;
200 void *obj;
201
202 rcu_read_lock();
203 elem = xa_load(xa, index);
204 if (elem && kref_get_unless_zero(&elem->ref_cnt))
205 obj = elem->obj;
206 else
207 obj = NULL;
208 rcu_read_unlock();
209
210 return obj;
211 }
212
rxe_elem_release(struct kref * kref)213 static void rxe_elem_release(struct kref *kref)
214 {
215 struct rxe_pool_elem *elem = container_of(kref, typeof(*elem), ref_cnt);
216
217 complete(&elem->complete);
218 }
219
__rxe_cleanup(struct rxe_pool_elem * elem,bool sleepable)220 int __rxe_cleanup(struct rxe_pool_elem *elem, bool sleepable)
221 {
222 struct rxe_pool *pool = elem->pool;
223 struct xarray *xa = &pool->xa;
224 static int timeout = RXE_POOL_TIMEOUT;
225 int ret, err = 0;
226 void *xa_ret;
227
228 if (sleepable)
229 might_sleep();
230
231 /* erase xarray entry to prevent looking up
232 * the pool elem from its index
233 */
234 xa_ret = xa_erase(xa, elem->index);
235 WARN_ON(xa_err(xa_ret));
236
237 /* if this is the last call to rxe_put complete the
238 * object. It is safe to touch obj->elem after this since
239 * it is freed below
240 */
241 __rxe_put(elem);
242
243 /* wait until all references to the object have been
244 * dropped before final object specific cleanup and
245 * return to rdma-core
246 */
247 if (sleepable) {
248 if (!completion_done(&elem->complete) && timeout) {
249 ret = wait_for_completion_timeout(&elem->complete,
250 timeout);
251
252 /* Shouldn't happen. There are still references to
253 * the object but, rather than deadlock, free the
254 * object or pass back to rdma-core.
255 */
256 if (WARN_ON(!ret))
257 err = -EINVAL;
258 }
259 } else {
260 unsigned long until = jiffies + timeout;
261
262 /* AH objects are unique in that the destroy_ah verb
263 * can be called in atomic context. This delay
264 * replaces the wait_for_completion call above
265 * when the destroy_ah call is not sleepable
266 */
267 while (!completion_done(&elem->complete) &&
268 time_before(jiffies, until))
269 mdelay(1);
270
271 if (WARN_ON(!completion_done(&elem->complete)))
272 err = -EINVAL;
273 }
274
275 if (pool->cleanup)
276 pool->cleanup(elem);
277
278 if (pool->type == RXE_TYPE_MR)
279 kfree_rcu(elem->obj);
280
281 atomic_dec(&pool->num_elem);
282
283 return err;
284 }
285
__rxe_get(struct rxe_pool_elem * elem)286 int __rxe_get(struct rxe_pool_elem *elem)
287 {
288 return kref_get_unless_zero(&elem->ref_cnt);
289 }
290
__rxe_put(struct rxe_pool_elem * elem)291 int __rxe_put(struct rxe_pool_elem *elem)
292 {
293 return kref_put(&elem->ref_cnt, rxe_elem_release);
294 }
295
__rxe_finalize(struct rxe_pool_elem * elem)296 void __rxe_finalize(struct rxe_pool_elem *elem)
297 {
298 void *xa_ret;
299
300 xa_ret = xa_store(&elem->pool->xa, elem->index, elem, GFP_KERNEL);
301 WARN_ON(xa_err(xa_ret));
302 }
303