1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3 * RDMA Transport Layer
4 *
5 * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved.
6 * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved.
7 * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved.
8 */
9
10 #ifndef RTRS_PRI_H
11 #define RTRS_PRI_H
12
13 #include <linux/uuid.h>
14 #include <rdma/rdma_cm.h>
15 #include <rdma/ib_verbs.h>
16 #include <rdma/ib.h>
17
18 #include "rtrs.h"
19
20 #define RTRS_PROTO_VER_MAJOR 2
21 #define RTRS_PROTO_VER_MINOR 0
22
23 #define RTRS_PROTO_VER_STRING __stringify(RTRS_PROTO_VER_MAJOR) "." \
24 __stringify(RTRS_PROTO_VER_MINOR)
25
26 /*
27 * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS)
28 * and the minimum chunk size is 4096 (2^12).
29 * So the maximum sess_queue_depth is 65536 (2^16) in theory.
30 * But mempool_create, create_qp and ib_post_send fail with
31 * "cannot allocate memory" error if sess_queue_depth is too big.
32 * Therefore the pratical max value of sess_queue_depth is
33 * somewhere between 1 and 65534 and it depends on the system.
34 */
35 #define MAX_SESS_QUEUE_DEPTH 65535
36
37 enum rtrs_imm_const {
38 MAX_IMM_TYPE_BITS = 4,
39 MAX_IMM_TYPE_MASK = ((1 << MAX_IMM_TYPE_BITS) - 1),
40 MAX_IMM_PAYL_BITS = 28,
41 MAX_IMM_PAYL_MASK = ((1 << MAX_IMM_PAYL_BITS) - 1),
42 };
43
44 enum rtrs_imm_type {
45 RTRS_IO_REQ_IMM = 0, /* client to server */
46 RTRS_IO_RSP_IMM = 1, /* server to client */
47 RTRS_IO_RSP_W_INV_IMM = 2, /* server to client */
48
49 RTRS_HB_MSG_IMM = 8, /* HB: HeartBeat */
50 RTRS_HB_ACK_IMM = 9,
51
52 RTRS_LAST_IMM,
53 };
54
55 enum {
56 SERVICE_CON_QUEUE_DEPTH = 512,
57
58 MAX_PATHS_NUM = 128,
59
60 MIN_CHUNK_SIZE = 8192,
61
62 RTRS_HB_INTERVAL_MS = 5000,
63 RTRS_HB_MISSED_MAX = 5,
64
65 RTRS_MAGIC = 0x1BBD,
66 RTRS_PROTO_VER = (RTRS_PROTO_VER_MAJOR << 8) | RTRS_PROTO_VER_MINOR,
67 };
68
69 struct rtrs_ib_dev;
70
71 struct rtrs_rdma_dev_pd_ops {
72 struct rtrs_ib_dev *(*alloc)(void);
73 void (*free)(struct rtrs_ib_dev *dev);
74 int (*init)(struct rtrs_ib_dev *dev);
75 void (*deinit)(struct rtrs_ib_dev *dev);
76 };
77
78 struct rtrs_rdma_dev_pd {
79 struct mutex mutex;
80 struct list_head list;
81 enum ib_pd_flags pd_flags;
82 const struct rtrs_rdma_dev_pd_ops *ops;
83 };
84
85 struct rtrs_ib_dev {
86 struct ib_device *ib_dev;
87 struct ib_pd *ib_pd;
88 struct kref ref;
89 struct list_head entry;
90 struct rtrs_rdma_dev_pd *pool;
91 };
92
93 struct rtrs_con {
94 struct rtrs_path *path;
95 struct ib_qp *qp;
96 struct ib_cq *cq;
97 struct rdma_cm_id *cm_id;
98 unsigned int cid;
99 int nr_cqe;
100 atomic_t wr_cnt;
101 atomic_t sq_wr_avail;
102 };
103
104 struct rtrs_path {
105 struct list_head entry;
106 struct sockaddr_storage dst_addr;
107 struct sockaddr_storage src_addr;
108 char sessname[NAME_MAX];
109 uuid_t uuid;
110 struct rtrs_con **con;
111 unsigned int con_num;
112 unsigned int irq_con_num;
113 unsigned int recon_cnt;
114 unsigned int signal_interval;
115 struct rtrs_ib_dev *dev;
116 int dev_ref;
117 struct ib_cqe *hb_cqe;
118 void (*hb_err_handler)(struct rtrs_con *con);
119 struct workqueue_struct *hb_wq;
120 struct delayed_work hb_dwork;
121 unsigned int hb_interval_ms;
122 unsigned int hb_missed_cnt;
123 unsigned int hb_missed_max;
124 ktime_t hb_last_sent;
125 ktime_t hb_cur_latency;
126 };
127
128 /* rtrs information unit */
129 struct rtrs_iu {
130 struct ib_cqe cqe;
131 dma_addr_t dma_addr;
132 void *buf;
133 size_t size;
134 enum dma_data_direction direction;
135 };
136
137 /**
138 * enum rtrs_msg_types - RTRS message types, see also rtrs/README
139 * @RTRS_MSG_INFO_REQ: Client additional info request to the server
140 * @RTRS_MSG_INFO_RSP: Server additional info response to the client
141 * @RTRS_MSG_WRITE: Client writes data per RDMA to server
142 * @RTRS_MSG_READ: Client requests data transfer from server
143 * @RTRS_MSG_RKEY_RSP: Server refreshed rkey for rbuf
144 */
145 enum rtrs_msg_types {
146 RTRS_MSG_INFO_REQ,
147 RTRS_MSG_INFO_RSP,
148 RTRS_MSG_WRITE,
149 RTRS_MSG_READ,
150 RTRS_MSG_RKEY_RSP,
151 };
152
153 /**
154 * enum rtrs_msg_flags - RTRS message flags.
155 * @RTRS_NEED_INVAL: Send invalidation in response.
156 * @RTRS_MSG_NEW_RKEY_F: Send refreshed rkey in response.
157 */
158 enum rtrs_msg_flags {
159 RTRS_MSG_NEED_INVAL_F = 1 << 0,
160 RTRS_MSG_NEW_RKEY_F = 1 << 1,
161 };
162
163 /**
164 * struct rtrs_sg_desc - RDMA-Buffer entry description
165 * @addr: Address of RDMA destination buffer
166 * @key: Authorization rkey to write to the buffer
167 * @len: Size of the buffer
168 */
169 struct rtrs_sg_desc {
170 __le64 addr;
171 __le32 key;
172 __le32 len;
173 };
174
175 /**
176 * struct rtrs_msg_conn_req - Client connection request to the server
177 * @magic: RTRS magic
178 * @version: RTRS protocol version
179 * @cid: Current connection id
180 * @cid_num: Number of connections per session
181 * @recon_cnt: Reconnections counter
182 * @sess_uuid: UUID of a session (path)
183 * @paths_uuid: UUID of a group of sessions (paths)
184 *
185 * NOTE: max size 56 bytes, see man rdma_connect().
186 */
187 struct rtrs_msg_conn_req {
188 /* Is set to 0 by cma.c in case of AF_IB, do not touch that.
189 * see https://www.spinics.net/lists/linux-rdma/msg22397.html
190 */
191 u8 __cma_version;
192 /* On sender side that should be set to 0, or cma_save_ip_info()
193 * extract garbage and will fail.
194 */
195 u8 __ip_version;
196 __le16 magic;
197 __le16 version;
198 __le16 cid;
199 __le16 cid_num;
200 __le16 recon_cnt;
201 uuid_t sess_uuid;
202 uuid_t paths_uuid;
203 u8 first_conn : 1;
204 u8 reserved_bits : 7;
205 u8 reserved[11];
206 };
207
208 /**
209 * struct rtrs_msg_conn_rsp - Server connection response to the client
210 * @magic: RTRS magic
211 * @version: RTRS protocol version
212 * @errno: If rdma_accept() then 0, if rdma_reject() indicates error
213 * @queue_depth: max inflight messages (queue-depth) in this session
214 * @max_io_size: max io size server supports
215 * @max_hdr_size: max msg header size server supports
216 *
217 * NOTE: size is 56 bytes, max possible is 136 bytes, see man rdma_accept().
218 */
219 struct rtrs_msg_conn_rsp {
220 __le16 magic;
221 __le16 version;
222 __le16 errno;
223 __le16 queue_depth;
224 __le32 max_io_size;
225 __le32 max_hdr_size;
226 __le32 flags;
227 u8 reserved[36];
228 };
229
230 /**
231 * struct rtrs_msg_info_req
232 * @type: @RTRS_MSG_INFO_REQ
233 * @pathname: Path name chosen by client
234 */
235 struct rtrs_msg_info_req {
236 __le16 type;
237 u8 pathname[NAME_MAX];
238 u8 reserved[15];
239 };
240
241 /**
242 * struct rtrs_msg_info_rsp
243 * @type: @RTRS_MSG_INFO_RSP
244 * @sg_cnt: Number of @desc entries
245 * @desc: RDMA buffers where the client can write to server
246 */
247 struct rtrs_msg_info_rsp {
248 __le16 type;
249 __le16 sg_cnt;
250 u8 reserved[4];
251 struct rtrs_sg_desc desc[];
252 };
253
254 /**
255 * struct rtrs_msg_rkey_rsp
256 * @type: @RTRS_MSG_RKEY_RSP
257 * @buf_id: RDMA buf_id of the new rkey
258 * @rkey: new remote key for RDMA buffers id from server
259 */
260 struct rtrs_msg_rkey_rsp {
261 __le16 type;
262 __le16 buf_id;
263 __le32 rkey;
264 };
265
266 /**
267 * struct rtrs_msg_rdma_read - RDMA data transfer request from client
268 * @type: always @RTRS_MSG_READ
269 * @usr_len: length of user payload
270 * @sg_cnt: number of @desc entries
271 * @desc: RDMA buffers where the server can write the result to
272 */
273 struct rtrs_msg_rdma_read {
274 __le16 type;
275 __le16 usr_len;
276 __le16 flags;
277 __le16 sg_cnt;
278 struct rtrs_sg_desc desc[];
279 };
280
281 /**
282 * struct_msg_rdma_write - Message transferred to server with RDMA-Write
283 * @type: always @RTRS_MSG_WRITE
284 * @usr_len: length of user payload
285 */
286 struct rtrs_msg_rdma_write {
287 __le16 type;
288 __le16 usr_len;
289 };
290
291 /**
292 * struct_msg_rdma_hdr - header for read or write request
293 * @type: @RTRS_MSG_WRITE | @RTRS_MSG_READ
294 */
295 struct rtrs_msg_rdma_hdr {
296 __le16 type;
297 };
298
299 /* rtrs.c */
300
301 struct rtrs_iu *rtrs_iu_alloc(u32 queue_num, size_t size, gfp_t t,
302 struct ib_device *dev, enum dma_data_direction,
303 void (*done)(struct ib_cq *cq, struct ib_wc *wc));
304 void rtrs_iu_free(struct rtrs_iu *iu, struct ib_device *dev, u32 queue_num);
305 int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu);
306 int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size,
307 struct ib_send_wr *head);
308 int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu,
309 struct ib_sge *sge, unsigned int num_sge,
310 u32 rkey, u64 rdma_addr, u32 imm_data,
311 enum ib_send_flags flags,
312 struct ib_send_wr *head,
313 struct ib_send_wr *tail);
314
315 int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe);
316
317 int rtrs_cq_qp_create(struct rtrs_path *path, struct rtrs_con *con,
318 u32 max_send_sge, int cq_vector, int nr_cqe,
319 u32 max_send_wr, u32 max_recv_wr,
320 enum ib_poll_context poll_ctx);
321 void rtrs_cq_qp_destroy(struct rtrs_con *con);
322
323 void rtrs_init_hb(struct rtrs_path *path, struct ib_cqe *cqe,
324 unsigned int interval_ms, unsigned int missed_max,
325 void (*err_handler)(struct rtrs_con *con),
326 struct workqueue_struct *wq);
327 void rtrs_start_hb(struct rtrs_path *path);
328 void rtrs_stop_hb(struct rtrs_path *path);
329 void rtrs_send_hb_ack(struct rtrs_path *path);
330
331 void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags,
332 struct rtrs_rdma_dev_pd *pool);
333 void rtrs_rdma_dev_pd_deinit(struct rtrs_rdma_dev_pd *pool);
334
335 struct rtrs_ib_dev *rtrs_ib_dev_find_or_add(struct ib_device *ib_dev,
336 struct rtrs_rdma_dev_pd *pool);
337 int rtrs_ib_dev_put(struct rtrs_ib_dev *dev);
338
rtrs_to_imm(u32 type,u32 payload)339 static inline u32 rtrs_to_imm(u32 type, u32 payload)
340 {
341 BUILD_BUG_ON(MAX_IMM_PAYL_BITS + MAX_IMM_TYPE_BITS != 32);
342 BUILD_BUG_ON(RTRS_LAST_IMM > (1<<MAX_IMM_TYPE_BITS));
343 return ((type & MAX_IMM_TYPE_MASK) << MAX_IMM_PAYL_BITS) |
344 (payload & MAX_IMM_PAYL_MASK);
345 }
346
rtrs_from_imm(u32 imm,u32 * type,u32 * payload)347 static inline void rtrs_from_imm(u32 imm, u32 *type, u32 *payload)
348 {
349 *payload = imm & MAX_IMM_PAYL_MASK;
350 *type = imm >> MAX_IMM_PAYL_BITS;
351 }
352
rtrs_to_io_req_imm(u32 addr)353 static inline u32 rtrs_to_io_req_imm(u32 addr)
354 {
355 return rtrs_to_imm(RTRS_IO_REQ_IMM, addr);
356 }
357
rtrs_to_io_rsp_imm(u32 msg_id,int errno,bool w_inval)358 static inline u32 rtrs_to_io_rsp_imm(u32 msg_id, int errno, bool w_inval)
359 {
360 enum rtrs_imm_type type;
361 u32 payload;
362
363 /* 9 bits for errno, 19 bits for msg_id */
364 payload = (abs(errno) & 0x1ff) << 19 | (msg_id & 0x7ffff);
365 type = w_inval ? RTRS_IO_RSP_W_INV_IMM : RTRS_IO_RSP_IMM;
366
367 return rtrs_to_imm(type, payload);
368 }
369
rtrs_from_io_rsp_imm(u32 payload,u32 * msg_id,int * errno)370 static inline void rtrs_from_io_rsp_imm(u32 payload, u32 *msg_id, int *errno)
371 {
372 /* 9 bits for errno, 19 bits for msg_id */
373 *msg_id = payload & 0x7ffff;
374 *errno = -(int)((payload >> 19) & 0x1ff);
375 }
376
377 #define STAT_STORE_FUNC(type, set_value, reset) \
378 static ssize_t set_value##_store(struct kobject *kobj, \
379 struct kobj_attribute *attr, \
380 const char *buf, size_t count) \
381 { \
382 int ret = -EINVAL; \
383 type *stats = container_of(kobj, type, kobj_stats); \
384 \
385 if (sysfs_streq(buf, "1")) \
386 ret = reset(stats, true); \
387 else if (sysfs_streq(buf, "0")) \
388 ret = reset(stats, false); \
389 if (ret) \
390 return ret; \
391 \
392 return count; \
393 }
394
395 #define STAT_SHOW_FUNC(type, get_value, print) \
396 static ssize_t get_value##_show(struct kobject *kobj, \
397 struct kobj_attribute *attr, \
398 char *page) \
399 { \
400 type *stats = container_of(kobj, type, kobj_stats); \
401 \
402 return print(stats, page); \
403 }
404
405 #define STAT_ATTR(type, stat, print, reset) \
406 STAT_STORE_FUNC(type, stat, reset) \
407 STAT_SHOW_FUNC(type, stat, print) \
408 static struct kobj_attribute stat##_attr = __ATTR_RW(stat)
409
410 #endif /* RTRS_PRI_H */
411