1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2018 Chelsio Communications, Inc.
4  *
5  * Written by: Atul Gupta (atul.gupta@chelsio.com)
6  */
7 
8 #include <linux/module.h>
9 #include <linux/list.h>
10 #include <linux/workqueue.h>
11 #include <linux/skbuff.h>
12 #include <linux/timer.h>
13 #include <linux/notifier.h>
14 #include <linux/inetdevice.h>
15 #include <linux/ip.h>
16 #include <linux/tcp.h>
17 #include <linux/sched/signal.h>
18 #include <net/tcp.h>
19 #include <net/busy_poll.h>
20 #include <crypto/aes.h>
21 
22 #include "chtls.h"
23 #include "chtls_cm.h"
24 
is_tls_tx(struct chtls_sock * csk)25 static bool is_tls_tx(struct chtls_sock *csk)
26 {
27 	return csk->tlshws.txkey >= 0;
28 }
29 
is_tls_rx(struct chtls_sock * csk)30 static bool is_tls_rx(struct chtls_sock *csk)
31 {
32 	return csk->tlshws.rxkey >= 0;
33 }
34 
data_sgl_len(const struct sk_buff * skb)35 static int data_sgl_len(const struct sk_buff *skb)
36 {
37 	unsigned int cnt;
38 
39 	cnt = skb_shinfo(skb)->nr_frags;
40 	return sgl_len(cnt) * 8;
41 }
42 
nos_ivs(struct sock * sk,unsigned int size)43 static int nos_ivs(struct sock *sk, unsigned int size)
44 {
45 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
46 
47 	return DIV_ROUND_UP(size, csk->tlshws.mfs);
48 }
49 
set_ivs_imm(struct sock * sk,const struct sk_buff * skb)50 static int set_ivs_imm(struct sock *sk, const struct sk_buff *skb)
51 {
52 	int ivs_size = nos_ivs(sk, skb->len) * CIPHER_BLOCK_SIZE;
53 	int hlen = TLS_WR_CPL_LEN + data_sgl_len(skb);
54 
55 	if ((hlen + KEY_ON_MEM_SZ + ivs_size) <
56 	    MAX_IMM_OFLD_TX_DATA_WR_LEN) {
57 		ULP_SKB_CB(skb)->ulp.tls.iv = 1;
58 		return 1;
59 	}
60 	ULP_SKB_CB(skb)->ulp.tls.iv = 0;
61 	return 0;
62 }
63 
max_ivs_size(struct sock * sk,int size)64 static int max_ivs_size(struct sock *sk, int size)
65 {
66 	return nos_ivs(sk, size) * CIPHER_BLOCK_SIZE;
67 }
68 
ivs_size(struct sock * sk,const struct sk_buff * skb)69 static int ivs_size(struct sock *sk, const struct sk_buff *skb)
70 {
71 	return set_ivs_imm(sk, skb) ? (nos_ivs(sk, skb->len) *
72 		 CIPHER_BLOCK_SIZE) : 0;
73 }
74 
flowc_wr_credits(int nparams,int * flowclenp)75 static int flowc_wr_credits(int nparams, int *flowclenp)
76 {
77 	int flowclen16, flowclen;
78 
79 	flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]);
80 	flowclen16 = DIV_ROUND_UP(flowclen, 16);
81 	flowclen = flowclen16 * 16;
82 
83 	if (flowclenp)
84 		*flowclenp = flowclen;
85 
86 	return flowclen16;
87 }
88 
create_flowc_wr_skb(struct sock * sk,struct fw_flowc_wr * flowc,int flowclen)89 static struct sk_buff *create_flowc_wr_skb(struct sock *sk,
90 					   struct fw_flowc_wr *flowc,
91 					   int flowclen)
92 {
93 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
94 	struct sk_buff *skb;
95 
96 	skb = alloc_skb(flowclen, GFP_ATOMIC);
97 	if (!skb)
98 		return NULL;
99 
100 	__skb_put_data(skb, flowc, flowclen);
101 	skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA);
102 
103 	return skb;
104 }
105 
send_flowc_wr(struct sock * sk,struct fw_flowc_wr * flowc,int flowclen)106 static int send_flowc_wr(struct sock *sk, struct fw_flowc_wr *flowc,
107 			 int flowclen)
108 {
109 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
110 	struct tcp_sock *tp = tcp_sk(sk);
111 	struct sk_buff *skb;
112 	int flowclen16;
113 	int ret;
114 
115 	flowclen16 = flowclen / 16;
116 
117 	if (csk_flag(sk, CSK_TX_DATA_SENT)) {
118 		skb = create_flowc_wr_skb(sk, flowc, flowclen);
119 		if (!skb)
120 			return -ENOMEM;
121 
122 		skb_entail(sk, skb,
123 			   ULPCB_FLAG_NO_HDR | ULPCB_FLAG_NO_APPEND);
124 		return 0;
125 	}
126 
127 	ret = cxgb4_immdata_send(csk->egress_dev,
128 				 csk->txq_idx,
129 				 flowc, flowclen);
130 	if (!ret)
131 		return flowclen16;
132 	skb = create_flowc_wr_skb(sk, flowc, flowclen);
133 	if (!skb)
134 		return -ENOMEM;
135 	send_or_defer(sk, tp, skb, 0);
136 	return flowclen16;
137 }
138 
tcp_state_to_flowc_state(u8 state)139 static u8 tcp_state_to_flowc_state(u8 state)
140 {
141 	switch (state) {
142 	case TCP_ESTABLISHED:
143 		return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED;
144 	case TCP_CLOSE_WAIT:
145 		return FW_FLOWC_MNEM_TCPSTATE_CLOSEWAIT;
146 	case TCP_FIN_WAIT1:
147 		return FW_FLOWC_MNEM_TCPSTATE_FINWAIT1;
148 	case TCP_CLOSING:
149 		return FW_FLOWC_MNEM_TCPSTATE_CLOSING;
150 	case TCP_LAST_ACK:
151 		return FW_FLOWC_MNEM_TCPSTATE_LASTACK;
152 	case TCP_FIN_WAIT2:
153 		return FW_FLOWC_MNEM_TCPSTATE_FINWAIT2;
154 	}
155 
156 	return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED;
157 }
158 
send_tx_flowc_wr(struct sock * sk,int compl,u32 snd_nxt,u32 rcv_nxt)159 int send_tx_flowc_wr(struct sock *sk, int compl,
160 		     u32 snd_nxt, u32 rcv_nxt)
161 {
162 	struct flowc_packed {
163 		struct fw_flowc_wr fc;
164 		struct fw_flowc_mnemval mnemval[FW_FLOWC_MNEM_MAX];
165 	} __packed sflowc;
166 	int nparams, paramidx, flowclen16, flowclen;
167 	struct fw_flowc_wr *flowc;
168 	struct chtls_sock *csk;
169 	struct tcp_sock *tp;
170 
171 	csk = rcu_dereference_sk_user_data(sk);
172 	tp = tcp_sk(sk);
173 	memset(&sflowc, 0, sizeof(sflowc));
174 	flowc = &sflowc.fc;
175 
176 #define FLOWC_PARAM(__m, __v) \
177 	do { \
178 		flowc->mnemval[paramidx].mnemonic = FW_FLOWC_MNEM_##__m; \
179 		flowc->mnemval[paramidx].val = cpu_to_be32(__v); \
180 		paramidx++; \
181 	} while (0)
182 
183 	paramidx = 0;
184 
185 	FLOWC_PARAM(PFNVFN, FW_PFVF_CMD_PFN_V(csk->cdev->lldi->pf));
186 	FLOWC_PARAM(CH, csk->tx_chan);
187 	FLOWC_PARAM(PORT, csk->tx_chan);
188 	FLOWC_PARAM(IQID, csk->rss_qid);
189 	FLOWC_PARAM(SNDNXT, tp->snd_nxt);
190 	FLOWC_PARAM(RCVNXT, tp->rcv_nxt);
191 	FLOWC_PARAM(SNDBUF, csk->sndbuf);
192 	FLOWC_PARAM(MSS, tp->mss_cache);
193 	FLOWC_PARAM(TCPSTATE, tcp_state_to_flowc_state(sk->sk_state));
194 
195 	if (SND_WSCALE(tp))
196 		FLOWC_PARAM(RCV_SCALE, SND_WSCALE(tp));
197 
198 	if (csk->ulp_mode == ULP_MODE_TLS)
199 		FLOWC_PARAM(ULD_MODE, ULP_MODE_TLS);
200 
201 	if (csk->tlshws.fcplenmax)
202 		FLOWC_PARAM(TXDATAPLEN_MAX, csk->tlshws.fcplenmax);
203 
204 	nparams = paramidx;
205 #undef FLOWC_PARAM
206 
207 	flowclen16 = flowc_wr_credits(nparams, &flowclen);
208 	flowc->op_to_nparams =
209 		cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
210 			    FW_WR_COMPL_V(compl) |
211 			    FW_FLOWC_WR_NPARAMS_V(nparams));
212 	flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) |
213 					  FW_WR_FLOWID_V(csk->tid));
214 
215 	return send_flowc_wr(sk, flowc, flowclen);
216 }
217 
218 /* Copy IVs to WR */
tls_copy_ivs(struct sock * sk,struct sk_buff * skb)219 static int tls_copy_ivs(struct sock *sk, struct sk_buff *skb)
220 
221 {
222 	struct chtls_sock *csk;
223 	unsigned char *iv_loc;
224 	struct chtls_hws *hws;
225 	unsigned char *ivs;
226 	u16 number_of_ivs;
227 	struct page *page;
228 	int err = 0;
229 
230 	csk = rcu_dereference_sk_user_data(sk);
231 	hws = &csk->tlshws;
232 	number_of_ivs = nos_ivs(sk, skb->len);
233 
234 	if (number_of_ivs > MAX_IVS_PAGE) {
235 		pr_warn("MAX IVs in PAGE exceeded %d\n", number_of_ivs);
236 		return -ENOMEM;
237 	}
238 
239 	/* generate the  IVs */
240 	ivs = kmalloc_array(CIPHER_BLOCK_SIZE, number_of_ivs, GFP_ATOMIC);
241 	if (!ivs)
242 		return -ENOMEM;
243 	get_random_bytes(ivs, number_of_ivs * CIPHER_BLOCK_SIZE);
244 
245 	if (skb_ulp_tls_iv_imm(skb)) {
246 		/* send the IVs as immediate data in the WR */
247 		iv_loc = (unsigned char *)__skb_push(skb, number_of_ivs *
248 						CIPHER_BLOCK_SIZE);
249 		if (iv_loc)
250 			memcpy(iv_loc, ivs, number_of_ivs * CIPHER_BLOCK_SIZE);
251 
252 		hws->ivsize = number_of_ivs * CIPHER_BLOCK_SIZE;
253 	} else {
254 		/* Send the IVs as sgls */
255 		/* Already accounted IV DSGL for credits */
256 		skb_shinfo(skb)->nr_frags--;
257 		page = alloc_pages(sk->sk_allocation | __GFP_COMP, 0);
258 		if (!page) {
259 			pr_info("%s : Page allocation for IVs failed\n",
260 				__func__);
261 			err = -ENOMEM;
262 			goto out;
263 		}
264 		memcpy(page_address(page), ivs, number_of_ivs *
265 		       CIPHER_BLOCK_SIZE);
266 		skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, page, 0,
267 				   number_of_ivs * CIPHER_BLOCK_SIZE);
268 		hws->ivsize = 0;
269 	}
270 out:
271 	kfree(ivs);
272 	return err;
273 }
274 
275 /* Copy Key to WR */
tls_copy_tx_key(struct sock * sk,struct sk_buff * skb)276 static void tls_copy_tx_key(struct sock *sk, struct sk_buff *skb)
277 {
278 	struct ulptx_sc_memrd *sc_memrd;
279 	struct chtls_sock *csk;
280 	struct chtls_dev *cdev;
281 	struct ulptx_idata *sc;
282 	struct chtls_hws *hws;
283 	u32 immdlen;
284 	int kaddr;
285 
286 	csk = rcu_dereference_sk_user_data(sk);
287 	hws = &csk->tlshws;
288 	cdev = csk->cdev;
289 
290 	immdlen = sizeof(*sc) + sizeof(*sc_memrd);
291 	kaddr = keyid_to_addr(cdev->kmap.start, hws->txkey);
292 	sc = (struct ulptx_idata *)__skb_push(skb, immdlen);
293 	if (sc) {
294 		sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP));
295 		sc->len = htonl(0);
296 		sc_memrd = (struct ulptx_sc_memrd *)(sc + 1);
297 		sc_memrd->cmd_to_len =
298 				htonl(ULPTX_CMD_V(ULP_TX_SC_MEMRD) |
299 				ULP_TX_SC_MORE_V(1) |
300 				ULPTX_LEN16_V(hws->keylen >> 4));
301 		sc_memrd->addr = htonl(kaddr);
302 	}
303 }
304 
tlstx_incr_seqnum(struct chtls_hws * hws)305 static u64 tlstx_incr_seqnum(struct chtls_hws *hws)
306 {
307 	return hws->tx_seq_no++;
308 }
309 
is_sg_request(const struct sk_buff * skb)310 static bool is_sg_request(const struct sk_buff *skb)
311 {
312 	return skb->peeked ||
313 		(skb->len > MAX_IMM_ULPTX_WR_LEN);
314 }
315 
316 /*
317  * Returns true if an sk_buff carries urgent data.
318  */
skb_urgent(struct sk_buff * skb)319 static bool skb_urgent(struct sk_buff *skb)
320 {
321 	return ULP_SKB_CB(skb)->flags & ULPCB_FLAG_URG;
322 }
323 
324 /* TLS content type for CPL SFO */
tls_content_type(unsigned char content_type)325 static unsigned char tls_content_type(unsigned char content_type)
326 {
327 	switch (content_type) {
328 	case TLS_HDR_TYPE_CCS:
329 		return CPL_TX_TLS_SFO_TYPE_CCS;
330 	case TLS_HDR_TYPE_ALERT:
331 		return CPL_TX_TLS_SFO_TYPE_ALERT;
332 	case TLS_HDR_TYPE_HANDSHAKE:
333 		return CPL_TX_TLS_SFO_TYPE_HANDSHAKE;
334 	case TLS_HDR_TYPE_HEARTBEAT:
335 		return CPL_TX_TLS_SFO_TYPE_HEARTBEAT;
336 	}
337 	return CPL_TX_TLS_SFO_TYPE_DATA;
338 }
339 
tls_tx_data_wr(struct sock * sk,struct sk_buff * skb,int dlen,int tls_immd,u32 credits,int expn,int pdus)340 static void tls_tx_data_wr(struct sock *sk, struct sk_buff *skb,
341 			   int dlen, int tls_immd, u32 credits,
342 			   int expn, int pdus)
343 {
344 	struct fw_tlstx_data_wr *req_wr;
345 	struct cpl_tx_tls_sfo *req_cpl;
346 	unsigned int wr_ulp_mode_force;
347 	struct tls_scmd *updated_scmd;
348 	unsigned char data_type;
349 	struct chtls_sock *csk;
350 	struct net_device *dev;
351 	struct chtls_hws *hws;
352 	struct tls_scmd *scmd;
353 	struct adapter *adap;
354 	unsigned char *req;
355 	int immd_len;
356 	int iv_imm;
357 	int len;
358 
359 	csk = rcu_dereference_sk_user_data(sk);
360 	iv_imm = skb_ulp_tls_iv_imm(skb);
361 	dev = csk->egress_dev;
362 	adap = netdev2adap(dev);
363 	hws = &csk->tlshws;
364 	scmd = &hws->scmd;
365 	len = dlen + expn;
366 
367 	dlen = (dlen < hws->mfs) ? dlen : hws->mfs;
368 	atomic_inc(&adap->chcr_stats.tls_pdu_tx);
369 
370 	updated_scmd = scmd;
371 	updated_scmd->seqno_numivs &= 0xffffff80;
372 	updated_scmd->seqno_numivs |= SCMD_NUM_IVS_V(pdus);
373 	hws->scmd = *updated_scmd;
374 
375 	req = (unsigned char *)__skb_push(skb, sizeof(struct cpl_tx_tls_sfo));
376 	req_cpl = (struct cpl_tx_tls_sfo *)req;
377 	req = (unsigned char *)__skb_push(skb, (sizeof(struct
378 				fw_tlstx_data_wr)));
379 
380 	req_wr = (struct fw_tlstx_data_wr *)req;
381 	immd_len = (tls_immd ? dlen : 0);
382 	req_wr->op_to_immdlen =
383 		htonl(FW_WR_OP_V(FW_TLSTX_DATA_WR) |
384 		FW_TLSTX_DATA_WR_COMPL_V(1) |
385 		FW_TLSTX_DATA_WR_IMMDLEN_V(immd_len));
386 	req_wr->flowid_len16 = htonl(FW_TLSTX_DATA_WR_FLOWID_V(csk->tid) |
387 				     FW_TLSTX_DATA_WR_LEN16_V(credits));
388 	wr_ulp_mode_force = TX_ULP_MODE_V(ULP_MODE_TLS);
389 
390 	if (is_sg_request(skb))
391 		wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F |
392 			((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 :
393 			FW_OFLD_TX_DATA_WR_SHOVE_F);
394 
395 	req_wr->lsodisable_to_flags =
396 			htonl(TX_ULP_MODE_V(ULP_MODE_TLS) |
397 			      TX_URG_V(skb_urgent(skb)) |
398 			      T6_TX_FORCE_F | wr_ulp_mode_force |
399 			      TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) &&
400 					 skb_queue_empty(&csk->txq)));
401 
402 	req_wr->ctxloc_to_exp =
403 			htonl(FW_TLSTX_DATA_WR_NUMIVS_V(pdus) |
404 			      FW_TLSTX_DATA_WR_EXP_V(expn) |
405 			      FW_TLSTX_DATA_WR_CTXLOC_V(CHTLS_KEY_CONTEXT_DDR) |
406 			      FW_TLSTX_DATA_WR_IVDSGL_V(!iv_imm) |
407 			      FW_TLSTX_DATA_WR_KEYSIZE_V(hws->keylen >> 4));
408 
409 	/* Fill in the length */
410 	req_wr->plen = htonl(len);
411 	req_wr->mfs = htons(hws->mfs);
412 	req_wr->adjustedplen_pkd =
413 		htons(FW_TLSTX_DATA_WR_ADJUSTEDPLEN_V(hws->adjustlen));
414 	req_wr->expinplenmax_pkd =
415 		htons(FW_TLSTX_DATA_WR_EXPINPLENMAX_V(hws->expansion));
416 	req_wr->pdusinplenmax_pkd =
417 		FW_TLSTX_DATA_WR_PDUSINPLENMAX_V(hws->pdus);
418 	req_wr->r10 = 0;
419 
420 	data_type = tls_content_type(ULP_SKB_CB(skb)->ulp.tls.type);
421 	req_cpl->op_to_seg_len = htonl(CPL_TX_TLS_SFO_OPCODE_V(CPL_TX_TLS_SFO) |
422 				       CPL_TX_TLS_SFO_DATA_TYPE_V(data_type) |
423 				       CPL_TX_TLS_SFO_CPL_LEN_V(2) |
424 				       CPL_TX_TLS_SFO_SEG_LEN_V(dlen));
425 	req_cpl->pld_len = htonl(len - expn);
426 
427 	req_cpl->type_protover = htonl(CPL_TX_TLS_SFO_TYPE_V
428 		((data_type == CPL_TX_TLS_SFO_TYPE_HEARTBEAT) ?
429 		TLS_HDR_TYPE_HEARTBEAT : 0) |
430 		CPL_TX_TLS_SFO_PROTOVER_V(0));
431 
432 	/* create the s-command */
433 	req_cpl->r1_lo = 0;
434 	req_cpl->seqno_numivs  = cpu_to_be32(hws->scmd.seqno_numivs);
435 	req_cpl->ivgen_hdrlen = cpu_to_be32(hws->scmd.ivgen_hdrlen);
436 	req_cpl->scmd1 = cpu_to_be64(tlstx_incr_seqnum(hws));
437 }
438 
439 /*
440  * Calculate the TLS data expansion size
441  */
chtls_expansion_size(struct sock * sk,int data_len,int fullpdu,unsigned short * pducnt)442 static int chtls_expansion_size(struct sock *sk, int data_len,
443 				int fullpdu,
444 				unsigned short *pducnt)
445 {
446 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
447 	struct chtls_hws *hws = &csk->tlshws;
448 	struct tls_scmd *scmd = &hws->scmd;
449 	int fragsize = hws->mfs;
450 	int expnsize = 0;
451 	int fragleft;
452 	int fragcnt;
453 	int expppdu;
454 
455 	if (SCMD_CIPH_MODE_G(scmd->seqno_numivs) ==
456 	    SCMD_CIPH_MODE_AES_GCM) {
457 		expppdu = GCM_TAG_SIZE + AEAD_EXPLICIT_DATA_SIZE +
458 			  TLS_HEADER_LENGTH;
459 
460 		if (fullpdu) {
461 			*pducnt = data_len / (expppdu + fragsize);
462 			if (*pducnt > 32)
463 				*pducnt = 32;
464 			else if (!*pducnt)
465 				*pducnt = 1;
466 			expnsize = (*pducnt) * expppdu;
467 			return expnsize;
468 		}
469 		fragcnt = (data_len / fragsize);
470 		expnsize =  fragcnt * expppdu;
471 		fragleft = data_len % fragsize;
472 		if (fragleft > 0)
473 			expnsize += expppdu;
474 	}
475 	return expnsize;
476 }
477 
478 /* WR with IV, KEY and CPL SFO added */
make_tlstx_data_wr(struct sock * sk,struct sk_buff * skb,int tls_tx_imm,int tls_len,u32 credits)479 static void make_tlstx_data_wr(struct sock *sk, struct sk_buff *skb,
480 			       int tls_tx_imm, int tls_len, u32 credits)
481 {
482 	unsigned short pdus_per_ulp = 0;
483 	struct chtls_sock *csk;
484 	struct chtls_hws *hws;
485 	int expn_sz;
486 	int pdus;
487 
488 	csk = rcu_dereference_sk_user_data(sk);
489 	hws = &csk->tlshws;
490 	pdus = DIV_ROUND_UP(tls_len, hws->mfs);
491 	expn_sz = chtls_expansion_size(sk, tls_len, 0, NULL);
492 	if (!hws->compute) {
493 		hws->expansion = chtls_expansion_size(sk,
494 						      hws->fcplenmax,
495 						      1, &pdus_per_ulp);
496 		hws->pdus = pdus_per_ulp;
497 		hws->adjustlen = hws->pdus *
498 			((hws->expansion / hws->pdus) + hws->mfs);
499 		hws->compute = 1;
500 	}
501 	if (tls_copy_ivs(sk, skb))
502 		return;
503 	tls_copy_tx_key(sk, skb);
504 	tls_tx_data_wr(sk, skb, tls_len, tls_tx_imm, credits, expn_sz, pdus);
505 	hws->tx_seq_no += (pdus - 1);
506 }
507 
make_tx_data_wr(struct sock * sk,struct sk_buff * skb,unsigned int immdlen,int len,u32 credits,u32 compl)508 static void make_tx_data_wr(struct sock *sk, struct sk_buff *skb,
509 			    unsigned int immdlen, int len,
510 			    u32 credits, u32 compl)
511 {
512 	struct fw_ofld_tx_data_wr *req;
513 	unsigned int wr_ulp_mode_force;
514 	struct chtls_sock *csk;
515 	unsigned int opcode;
516 
517 	csk = rcu_dereference_sk_user_data(sk);
518 	opcode = FW_OFLD_TX_DATA_WR;
519 
520 	req = (struct fw_ofld_tx_data_wr *)__skb_push(skb, sizeof(*req));
521 	req->op_to_immdlen = htonl(WR_OP_V(opcode) |
522 				FW_WR_COMPL_V(compl) |
523 				FW_WR_IMMDLEN_V(immdlen));
524 	req->flowid_len16 = htonl(FW_WR_FLOWID_V(csk->tid) |
525 				FW_WR_LEN16_V(credits));
526 
527 	wr_ulp_mode_force = TX_ULP_MODE_V(csk->ulp_mode);
528 	if (is_sg_request(skb))
529 		wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F |
530 			((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 :
531 				FW_OFLD_TX_DATA_WR_SHOVE_F);
532 
533 	req->tunnel_to_proxy = htonl(wr_ulp_mode_force |
534 			TX_URG_V(skb_urgent(skb)) |
535 			TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) &&
536 				   skb_queue_empty(&csk->txq)));
537 	req->plen = htonl(len);
538 }
539 
chtls_wr_size(struct chtls_sock * csk,const struct sk_buff * skb,bool size)540 static int chtls_wr_size(struct chtls_sock *csk, const struct sk_buff *skb,
541 			 bool size)
542 {
543 	int wr_size;
544 
545 	wr_size = TLS_WR_CPL_LEN;
546 	wr_size += KEY_ON_MEM_SZ;
547 	wr_size += ivs_size(csk->sk, skb);
548 
549 	if (size)
550 		return wr_size;
551 
552 	/* frags counted for IV dsgl */
553 	if (!skb_ulp_tls_iv_imm(skb))
554 		skb_shinfo(skb)->nr_frags++;
555 
556 	return wr_size;
557 }
558 
is_ofld_imm(struct chtls_sock * csk,const struct sk_buff * skb)559 static bool is_ofld_imm(struct chtls_sock *csk, const struct sk_buff *skb)
560 {
561 	int length = skb->len;
562 
563 	if (skb->peeked || skb->len > MAX_IMM_ULPTX_WR_LEN)
564 		return false;
565 
566 	if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) {
567 		/* Check TLS header len for Immediate */
568 		if (csk->ulp_mode == ULP_MODE_TLS &&
569 		    skb_ulp_tls_inline(skb))
570 			length += chtls_wr_size(csk, skb, true);
571 		else
572 			length += sizeof(struct fw_ofld_tx_data_wr);
573 
574 		return length <= MAX_IMM_OFLD_TX_DATA_WR_LEN;
575 	}
576 	return true;
577 }
578 
calc_tx_flits(const struct sk_buff * skb,unsigned int immdlen)579 static unsigned int calc_tx_flits(const struct sk_buff *skb,
580 				  unsigned int immdlen)
581 {
582 	unsigned int flits, cnt;
583 
584 	flits = immdlen / 8;   /* headers */
585 	cnt = skb_shinfo(skb)->nr_frags;
586 	if (skb_tail_pointer(skb) != skb_transport_header(skb))
587 		cnt++;
588 	return flits + sgl_len(cnt);
589 }
590 
arp_failure_discard(void * handle,struct sk_buff * skb)591 static void arp_failure_discard(void *handle, struct sk_buff *skb)
592 {
593 	kfree_skb(skb);
594 }
595 
chtls_push_frames(struct chtls_sock * csk,int comp)596 int chtls_push_frames(struct chtls_sock *csk, int comp)
597 {
598 	struct chtls_hws *hws = &csk->tlshws;
599 	struct tcp_sock *tp;
600 	struct sk_buff *skb;
601 	int total_size = 0;
602 	struct sock *sk;
603 	int wr_size;
604 
605 	wr_size = sizeof(struct fw_ofld_tx_data_wr);
606 	sk = csk->sk;
607 	tp = tcp_sk(sk);
608 
609 	if (unlikely(sk_in_state(sk, TCPF_SYN_SENT | TCPF_CLOSE)))
610 		return 0;
611 
612 	if (unlikely(csk_flag(sk, CSK_ABORT_SHUTDOWN)))
613 		return 0;
614 
615 	while (csk->wr_credits && (skb = skb_peek(&csk->txq)) &&
616 	       (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_HOLD) ||
617 		skb_queue_len(&csk->txq) > 1)) {
618 		unsigned int credit_len = skb->len;
619 		unsigned int credits_needed;
620 		unsigned int completion = 0;
621 		int tls_len = skb->len;/* TLS data len before IV/key */
622 		unsigned int immdlen;
623 		int len = skb->len;    /* length [ulp bytes] inserted by hw */
624 		int flowclen16 = 0;
625 		int tls_tx_imm = 0;
626 
627 		immdlen = skb->len;
628 		if (!is_ofld_imm(csk, skb)) {
629 			immdlen = skb_transport_offset(skb);
630 			if (skb_ulp_tls_inline(skb))
631 				wr_size = chtls_wr_size(csk, skb, false);
632 			credit_len = 8 * calc_tx_flits(skb, immdlen);
633 		} else {
634 			if (skb_ulp_tls_inline(skb)) {
635 				wr_size = chtls_wr_size(csk, skb, false);
636 				tls_tx_imm = 1;
637 			}
638 		}
639 		if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR))
640 			credit_len += wr_size;
641 		credits_needed = DIV_ROUND_UP(credit_len, 16);
642 		if (!csk_flag_nochk(csk, CSK_TX_DATA_SENT)) {
643 			flowclen16 = send_tx_flowc_wr(sk, 1, tp->snd_nxt,
644 						      tp->rcv_nxt);
645 			if (flowclen16 <= 0)
646 				break;
647 			csk->wr_credits -= flowclen16;
648 			csk->wr_unacked += flowclen16;
649 			csk->wr_nondata += flowclen16;
650 			csk_set_flag(csk, CSK_TX_DATA_SENT);
651 		}
652 
653 		if (csk->wr_credits < credits_needed) {
654 			if (skb_ulp_tls_inline(skb) &&
655 			    !skb_ulp_tls_iv_imm(skb))
656 				skb_shinfo(skb)->nr_frags--;
657 			break;
658 		}
659 
660 		__skb_unlink(skb, &csk->txq);
661 		skb_set_queue_mapping(skb, (csk->txq_idx << 1) |
662 				      CPL_PRIORITY_DATA);
663 		if (hws->ofld)
664 			hws->txqid = (skb->queue_mapping >> 1);
665 		skb->csum = (__force __wsum)(credits_needed + csk->wr_nondata);
666 		csk->wr_credits -= credits_needed;
667 		csk->wr_unacked += credits_needed;
668 		csk->wr_nondata = 0;
669 		enqueue_wr(csk, skb);
670 
671 		if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) {
672 			if ((comp && csk->wr_unacked == credits_needed) ||
673 			    (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) ||
674 			    csk->wr_unacked >= csk->wr_max_credits / 2) {
675 				completion = 1;
676 				csk->wr_unacked = 0;
677 			}
678 			if (skb_ulp_tls_inline(skb))
679 				make_tlstx_data_wr(sk, skb, tls_tx_imm,
680 						   tls_len, credits_needed);
681 			else
682 				make_tx_data_wr(sk, skb, immdlen, len,
683 						credits_needed, completion);
684 			tp->snd_nxt += len;
685 			tp->lsndtime = tcp_jiffies32;
686 			if (completion)
687 				ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_NEED_HDR;
688 		} else {
689 			struct cpl_close_con_req *req = cplhdr(skb);
690 			unsigned int cmd  = CPL_OPCODE_G(ntohl
691 					     (OPCODE_TID(req)));
692 
693 			if (cmd == CPL_CLOSE_CON_REQ)
694 				csk_set_flag(csk,
695 					     CSK_CLOSE_CON_REQUESTED);
696 
697 			if ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) &&
698 			    (csk->wr_unacked >= csk->wr_max_credits / 2)) {
699 				req->wr.wr_hi |= htonl(FW_WR_COMPL_F);
700 				csk->wr_unacked = 0;
701 			}
702 		}
703 		total_size += skb->truesize;
704 		if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_BARRIER)
705 			csk_set_flag(csk, CSK_TX_WAIT_IDLE);
706 		t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
707 		cxgb4_l2t_send(csk->egress_dev, skb, csk->l2t_entry);
708 	}
709 	sk->sk_wmem_queued -= total_size;
710 	return total_size;
711 }
712 
mark_urg(struct tcp_sock * tp,int flags,struct sk_buff * skb)713 static void mark_urg(struct tcp_sock *tp, int flags,
714 		     struct sk_buff *skb)
715 {
716 	if (unlikely(flags & MSG_OOB)) {
717 		tp->snd_up = tp->write_seq;
718 		ULP_SKB_CB(skb)->flags = ULPCB_FLAG_URG |
719 					 ULPCB_FLAG_BARRIER |
720 					 ULPCB_FLAG_NO_APPEND |
721 					 ULPCB_FLAG_NEED_HDR;
722 	}
723 }
724 
725 /*
726  * Returns true if a connection should send more data to TCP engine
727  */
should_push(struct sock * sk)728 static bool should_push(struct sock *sk)
729 {
730 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
731 	struct chtls_dev *cdev = csk->cdev;
732 	struct tcp_sock *tp = tcp_sk(sk);
733 
734 	/*
735 	 * If we've released our offload resources there's nothing to do ...
736 	 */
737 	if (!cdev)
738 		return false;
739 
740 	/*
741 	 * If there aren't any work requests in flight, or there isn't enough
742 	 * data in flight, or Nagle is off then send the current TX_DATA
743 	 * otherwise hold it and wait to accumulate more data.
744 	 */
745 	return csk->wr_credits == csk->wr_max_credits ||
746 		(tp->nonagle & TCP_NAGLE_OFF);
747 }
748 
749 /*
750  * Returns true if a TCP socket is corked.
751  */
corked(const struct tcp_sock * tp,int flags)752 static bool corked(const struct tcp_sock *tp, int flags)
753 {
754 	return (flags & MSG_MORE) || (tp->nonagle & TCP_NAGLE_CORK);
755 }
756 
757 /*
758  * Returns true if a send should try to push new data.
759  */
send_should_push(struct sock * sk,int flags)760 static bool send_should_push(struct sock *sk, int flags)
761 {
762 	return should_push(sk) && !corked(tcp_sk(sk), flags);
763 }
764 
chtls_tcp_push(struct sock * sk,int flags)765 void chtls_tcp_push(struct sock *sk, int flags)
766 {
767 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
768 	int qlen = skb_queue_len(&csk->txq);
769 
770 	if (likely(qlen)) {
771 		struct sk_buff *skb = skb_peek_tail(&csk->txq);
772 		struct tcp_sock *tp = tcp_sk(sk);
773 
774 		mark_urg(tp, flags, skb);
775 
776 		if (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) &&
777 		    corked(tp, flags)) {
778 			ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_HOLD;
779 			return;
780 		}
781 
782 		ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_HOLD;
783 		if (qlen == 1 &&
784 		    ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
785 		     should_push(sk)))
786 			chtls_push_frames(csk, 1);
787 	}
788 }
789 
790 /*
791  * Calculate the size for a new send sk_buff.  It's maximum size so we can
792  * pack lots of data into it, unless we plan to send it immediately, in which
793  * case we size it more tightly.
794  *
795  * Note: we don't bother compensating for MSS < PAGE_SIZE because it doesn't
796  * arise in normal cases and when it does we are just wasting memory.
797  */
select_size(struct sock * sk,int io_len,int flags,int len)798 static int select_size(struct sock *sk, int io_len, int flags, int len)
799 {
800 	const int pgbreak = SKB_MAX_HEAD(len);
801 
802 	/*
803 	 * If the data wouldn't fit in the main body anyway, put only the
804 	 * header in the main body so it can use immediate data and place all
805 	 * the payload in page fragments.
806 	 */
807 	if (io_len > pgbreak)
808 		return 0;
809 
810 	/*
811 	 * If we will be accumulating payload get a large main body.
812 	 */
813 	if (!send_should_push(sk, flags))
814 		return pgbreak;
815 
816 	return io_len;
817 }
818 
skb_entail(struct sock * sk,struct sk_buff * skb,int flags)819 void skb_entail(struct sock *sk, struct sk_buff *skb, int flags)
820 {
821 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
822 	struct tcp_sock *tp = tcp_sk(sk);
823 
824 	ULP_SKB_CB(skb)->seq = tp->write_seq;
825 	ULP_SKB_CB(skb)->flags = flags;
826 	__skb_queue_tail(&csk->txq, skb);
827 	sk->sk_wmem_queued += skb->truesize;
828 
829 	if (TCP_PAGE(sk) && TCP_OFF(sk)) {
830 		put_page(TCP_PAGE(sk));
831 		TCP_PAGE(sk) = NULL;
832 		TCP_OFF(sk) = 0;
833 	}
834 }
835 
get_tx_skb(struct sock * sk,int size)836 static struct sk_buff *get_tx_skb(struct sock *sk, int size)
837 {
838 	struct sk_buff *skb;
839 
840 	skb = alloc_skb(size + TX_HEADER_LEN, sk->sk_allocation);
841 	if (likely(skb)) {
842 		skb_reserve(skb, TX_HEADER_LEN);
843 		skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR);
844 		skb_reset_transport_header(skb);
845 	}
846 	return skb;
847 }
848 
get_record_skb(struct sock * sk,int size,bool zcopy)849 static struct sk_buff *get_record_skb(struct sock *sk, int size, bool zcopy)
850 {
851 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
852 	struct sk_buff *skb;
853 
854 	skb = alloc_skb(((zcopy ? 0 : size) + TX_TLSHDR_LEN +
855 			KEY_ON_MEM_SZ + max_ivs_size(sk, size)),
856 			sk->sk_allocation);
857 	if (likely(skb)) {
858 		skb_reserve(skb, (TX_TLSHDR_LEN +
859 			    KEY_ON_MEM_SZ + max_ivs_size(sk, size)));
860 		skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR);
861 		skb_reset_transport_header(skb);
862 		ULP_SKB_CB(skb)->ulp.tls.ofld = 1;
863 		ULP_SKB_CB(skb)->ulp.tls.type = csk->tlshws.type;
864 	}
865 	return skb;
866 }
867 
tx_skb_finalize(struct sk_buff * skb)868 static void tx_skb_finalize(struct sk_buff *skb)
869 {
870 	struct ulp_skb_cb *cb = ULP_SKB_CB(skb);
871 
872 	if (!(cb->flags & ULPCB_FLAG_NO_HDR))
873 		cb->flags = ULPCB_FLAG_NEED_HDR;
874 	cb->flags |= ULPCB_FLAG_NO_APPEND;
875 }
876 
push_frames_if_head(struct sock * sk)877 static void push_frames_if_head(struct sock *sk)
878 {
879 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
880 
881 	if (skb_queue_len(&csk->txq) == 1)
882 		chtls_push_frames(csk, 1);
883 }
884 
chtls_skb_copy_to_page_nocache(struct sock * sk,struct iov_iter * from,struct sk_buff * skb,struct page * page,int off,int copy)885 static int chtls_skb_copy_to_page_nocache(struct sock *sk,
886 					  struct iov_iter *from,
887 					  struct sk_buff *skb,
888 					  struct page *page,
889 					  int off, int copy)
890 {
891 	int err;
892 
893 	err = skb_do_copy_data_nocache(sk, skb, from, page_address(page) +
894 				       off, copy, skb->len);
895 	if (err)
896 		return err;
897 
898 	skb->len             += copy;
899 	skb->data_len        += copy;
900 	skb->truesize        += copy;
901 	sk->sk_wmem_queued   += copy;
902 	return 0;
903 }
904 
csk_mem_free(struct chtls_dev * cdev,struct sock * sk)905 static bool csk_mem_free(struct chtls_dev *cdev, struct sock *sk)
906 {
907 	return (cdev->max_host_sndbuf - sk->sk_wmem_queued > 0);
908 }
909 
csk_wait_memory(struct chtls_dev * cdev,struct sock * sk,long * timeo_p)910 static int csk_wait_memory(struct chtls_dev *cdev,
911 			   struct sock *sk, long *timeo_p)
912 {
913 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
914 	int err = 0;
915 	long current_timeo;
916 	long vm_wait = 0;
917 	bool noblock;
918 
919 	current_timeo = *timeo_p;
920 	noblock = (*timeo_p ? false : true);
921 	if (csk_mem_free(cdev, sk)) {
922 		current_timeo = prandom_u32_max(HZ / 5) + 2;
923 		vm_wait = prandom_u32_max(HZ / 5) + 2;
924 	}
925 
926 	add_wait_queue(sk_sleep(sk), &wait);
927 	while (1) {
928 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
929 
930 		if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
931 			goto do_error;
932 		if (!*timeo_p) {
933 			if (noblock)
934 				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
935 			goto do_nonblock;
936 		}
937 		if (signal_pending(current))
938 			goto do_interrupted;
939 		sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
940 		if (csk_mem_free(cdev, sk) && !vm_wait)
941 			break;
942 
943 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
944 		sk->sk_write_pending++;
945 		sk_wait_event(sk, &current_timeo, sk->sk_err ||
946 			      (sk->sk_shutdown & SEND_SHUTDOWN) ||
947 			      (csk_mem_free(cdev, sk) && !vm_wait), &wait);
948 		sk->sk_write_pending--;
949 
950 		if (vm_wait) {
951 			vm_wait -= current_timeo;
952 			current_timeo = *timeo_p;
953 			if (current_timeo != MAX_SCHEDULE_TIMEOUT) {
954 				current_timeo -= vm_wait;
955 				if (current_timeo < 0)
956 					current_timeo = 0;
957 			}
958 			vm_wait = 0;
959 		}
960 		*timeo_p = current_timeo;
961 	}
962 do_rm_wq:
963 	remove_wait_queue(sk_sleep(sk), &wait);
964 	return err;
965 do_error:
966 	err = -EPIPE;
967 	goto do_rm_wq;
968 do_nonblock:
969 	err = -EAGAIN;
970 	goto do_rm_wq;
971 do_interrupted:
972 	err = sock_intr_errno(*timeo_p);
973 	goto do_rm_wq;
974 }
975 
chtls_proccess_cmsg(struct sock * sk,struct msghdr * msg,unsigned char * record_type)976 static int chtls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
977 			       unsigned char *record_type)
978 {
979 	struct cmsghdr *cmsg;
980 	int rc = -EINVAL;
981 
982 	for_each_cmsghdr(cmsg, msg) {
983 		if (!CMSG_OK(msg, cmsg))
984 			return -EINVAL;
985 		if (cmsg->cmsg_level != SOL_TLS)
986 			continue;
987 
988 		switch (cmsg->cmsg_type) {
989 		case TLS_SET_RECORD_TYPE:
990 			if (cmsg->cmsg_len < CMSG_LEN(sizeof(*record_type)))
991 				return -EINVAL;
992 
993 			if (msg->msg_flags & MSG_MORE)
994 				return -EINVAL;
995 
996 			*record_type = *(unsigned char *)CMSG_DATA(cmsg);
997 			rc = 0;
998 			break;
999 		default:
1000 			return -EINVAL;
1001 		}
1002 	}
1003 
1004 	return rc;
1005 }
1006 
chtls_sendmsg(struct sock * sk,struct msghdr * msg,size_t size)1007 int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
1008 {
1009 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1010 	struct chtls_dev *cdev = csk->cdev;
1011 	struct tcp_sock *tp = tcp_sk(sk);
1012 	struct sk_buff *skb;
1013 	int mss, flags, err;
1014 	int recordsz = 0;
1015 	int copied = 0;
1016 	long timeo;
1017 
1018 	lock_sock(sk);
1019 	flags = msg->msg_flags;
1020 	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
1021 
1022 	if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
1023 		err = sk_stream_wait_connect(sk, &timeo);
1024 		if (err)
1025 			goto out_err;
1026 	}
1027 
1028 	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1029 	err = -EPIPE;
1030 	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
1031 		goto out_err;
1032 
1033 	mss = csk->mss;
1034 	csk_set_flag(csk, CSK_TX_MORE_DATA);
1035 
1036 	while (msg_data_left(msg)) {
1037 		int copy = 0;
1038 
1039 		skb = skb_peek_tail(&csk->txq);
1040 		if (skb) {
1041 			copy = mss - skb->len;
1042 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1043 		}
1044 		if (!csk_mem_free(cdev, sk))
1045 			goto wait_for_sndbuf;
1046 
1047 		if (is_tls_tx(csk) && !csk->tlshws.txleft) {
1048 			unsigned char record_type = TLS_RECORD_TYPE_DATA;
1049 
1050 			if (unlikely(msg->msg_controllen)) {
1051 				err = chtls_proccess_cmsg(sk, msg,
1052 							  &record_type);
1053 				if (err)
1054 					goto out_err;
1055 
1056 				/* Avoid appending tls handshake, alert to tls data */
1057 				if (skb)
1058 					tx_skb_finalize(skb);
1059 			}
1060 
1061 			recordsz = size;
1062 			csk->tlshws.txleft = recordsz;
1063 			csk->tlshws.type = record_type;
1064 		}
1065 
1066 		if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
1067 		    copy <= 0) {
1068 new_buf:
1069 			if (skb) {
1070 				tx_skb_finalize(skb);
1071 				push_frames_if_head(sk);
1072 			}
1073 
1074 			if (is_tls_tx(csk)) {
1075 				skb = get_record_skb(sk,
1076 						     select_size(sk,
1077 								 recordsz,
1078 								 flags,
1079 								 TX_TLSHDR_LEN),
1080 								 false);
1081 			} else {
1082 				skb = get_tx_skb(sk,
1083 						 select_size(sk, size, flags,
1084 							     TX_HEADER_LEN));
1085 			}
1086 			if (unlikely(!skb))
1087 				goto wait_for_memory;
1088 
1089 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1090 			copy = mss;
1091 		}
1092 		if (copy > size)
1093 			copy = size;
1094 
1095 		if (skb_tailroom(skb) > 0) {
1096 			copy = min(copy, skb_tailroom(skb));
1097 			if (is_tls_tx(csk))
1098 				copy = min_t(int, copy, csk->tlshws.txleft);
1099 			err = skb_add_data_nocache(sk, skb,
1100 						   &msg->msg_iter, copy);
1101 			if (err)
1102 				goto do_fault;
1103 		} else {
1104 			int i = skb_shinfo(skb)->nr_frags;
1105 			struct page *page = TCP_PAGE(sk);
1106 			int pg_size = PAGE_SIZE;
1107 			int off = TCP_OFF(sk);
1108 			bool merge;
1109 
1110 			if (page)
1111 				pg_size = page_size(page);
1112 			if (off < pg_size &&
1113 			    skb_can_coalesce(skb, i, page, off)) {
1114 				merge = true;
1115 				goto copy;
1116 			}
1117 			merge = false;
1118 			if (i == (is_tls_tx(csk) ? (MAX_SKB_FRAGS - 1) :
1119 			    MAX_SKB_FRAGS))
1120 				goto new_buf;
1121 
1122 			if (page && off == pg_size) {
1123 				put_page(page);
1124 				TCP_PAGE(sk) = page = NULL;
1125 				pg_size = PAGE_SIZE;
1126 			}
1127 
1128 			if (!page) {
1129 				gfp_t gfp = sk->sk_allocation;
1130 				int order = cdev->send_page_order;
1131 
1132 				if (order) {
1133 					page = alloc_pages(gfp | __GFP_COMP |
1134 							   __GFP_NOWARN |
1135 							   __GFP_NORETRY,
1136 							   order);
1137 					if (page)
1138 						pg_size <<= order;
1139 				}
1140 				if (!page) {
1141 					page = alloc_page(gfp);
1142 					pg_size = PAGE_SIZE;
1143 				}
1144 				if (!page)
1145 					goto wait_for_memory;
1146 				off = 0;
1147 			}
1148 copy:
1149 			if (copy > pg_size - off)
1150 				copy = pg_size - off;
1151 			if (is_tls_tx(csk))
1152 				copy = min_t(int, copy, csk->tlshws.txleft);
1153 
1154 			err = chtls_skb_copy_to_page_nocache(sk, &msg->msg_iter,
1155 							     skb, page,
1156 							     off, copy);
1157 			if (unlikely(err)) {
1158 				if (!TCP_PAGE(sk)) {
1159 					TCP_PAGE(sk) = page;
1160 					TCP_OFF(sk) = 0;
1161 				}
1162 				goto do_fault;
1163 			}
1164 			/* Update the skb. */
1165 			if (merge) {
1166 				skb_frag_size_add(
1167 						&skb_shinfo(skb)->frags[i - 1],
1168 						copy);
1169 			} else {
1170 				skb_fill_page_desc(skb, i, page, off, copy);
1171 				if (off + copy < pg_size) {
1172 					/* space left keep page */
1173 					get_page(page);
1174 					TCP_PAGE(sk) = page;
1175 				} else {
1176 					TCP_PAGE(sk) = NULL;
1177 				}
1178 			}
1179 			TCP_OFF(sk) = off + copy;
1180 		}
1181 		if (unlikely(skb->len == mss))
1182 			tx_skb_finalize(skb);
1183 		tp->write_seq += copy;
1184 		copied += copy;
1185 		size -= copy;
1186 
1187 		if (is_tls_tx(csk))
1188 			csk->tlshws.txleft -= copy;
1189 
1190 		if (corked(tp, flags) &&
1191 		    (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)))
1192 			ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND;
1193 
1194 		if (size == 0)
1195 			goto out;
1196 
1197 		if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND)
1198 			push_frames_if_head(sk);
1199 		continue;
1200 wait_for_sndbuf:
1201 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1202 wait_for_memory:
1203 		err = csk_wait_memory(cdev, sk, &timeo);
1204 		if (err)
1205 			goto do_error;
1206 	}
1207 out:
1208 	csk_reset_flag(csk, CSK_TX_MORE_DATA);
1209 	if (copied)
1210 		chtls_tcp_push(sk, flags);
1211 done:
1212 	release_sock(sk);
1213 	return copied;
1214 do_fault:
1215 	if (!skb->len) {
1216 		__skb_unlink(skb, &csk->txq);
1217 		sk->sk_wmem_queued -= skb->truesize;
1218 		__kfree_skb(skb);
1219 	}
1220 do_error:
1221 	if (copied)
1222 		goto out;
1223 out_err:
1224 	if (csk_conn_inline(csk))
1225 		csk_reset_flag(csk, CSK_TX_MORE_DATA);
1226 	copied = sk_stream_error(sk, flags, err);
1227 	goto done;
1228 }
1229 
chtls_sendpage(struct sock * sk,struct page * page,int offset,size_t size,int flags)1230 int chtls_sendpage(struct sock *sk, struct page *page,
1231 		   int offset, size_t size, int flags)
1232 {
1233 	struct chtls_sock *csk;
1234 	struct chtls_dev *cdev;
1235 	int mss, err, copied;
1236 	struct tcp_sock *tp;
1237 	long timeo;
1238 
1239 	tp = tcp_sk(sk);
1240 	copied = 0;
1241 	csk = rcu_dereference_sk_user_data(sk);
1242 	cdev = csk->cdev;
1243 	lock_sock(sk);
1244 	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
1245 
1246 	err = sk_stream_wait_connect(sk, &timeo);
1247 	if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
1248 	    err != 0)
1249 		goto out_err;
1250 
1251 	mss = csk->mss;
1252 	csk_set_flag(csk, CSK_TX_MORE_DATA);
1253 
1254 	while (size > 0) {
1255 		struct sk_buff *skb = skb_peek_tail(&csk->txq);
1256 		int copy, i;
1257 
1258 		if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
1259 		    (copy = mss - skb->len) <= 0) {
1260 new_buf:
1261 			if (!csk_mem_free(cdev, sk))
1262 				goto wait_for_sndbuf;
1263 
1264 			if (is_tls_tx(csk)) {
1265 				skb = get_record_skb(sk,
1266 						     select_size(sk, size,
1267 								 flags,
1268 								 TX_TLSHDR_LEN),
1269 						     true);
1270 			} else {
1271 				skb = get_tx_skb(sk, 0);
1272 			}
1273 			if (!skb)
1274 				goto wait_for_memory;
1275 			copy = mss;
1276 		}
1277 		if (copy > size)
1278 			copy = size;
1279 
1280 		i = skb_shinfo(skb)->nr_frags;
1281 		if (skb_can_coalesce(skb, i, page, offset)) {
1282 			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1283 		} else if (i < MAX_SKB_FRAGS) {
1284 			get_page(page);
1285 			skb_fill_page_desc(skb, i, page, offset, copy);
1286 		} else {
1287 			tx_skb_finalize(skb);
1288 			push_frames_if_head(sk);
1289 			goto new_buf;
1290 		}
1291 
1292 		skb->len += copy;
1293 		if (skb->len == mss)
1294 			tx_skb_finalize(skb);
1295 		skb->data_len += copy;
1296 		skb->truesize += copy;
1297 		sk->sk_wmem_queued += copy;
1298 		tp->write_seq += copy;
1299 		copied += copy;
1300 		offset += copy;
1301 		size -= copy;
1302 
1303 		if (corked(tp, flags) &&
1304 		    (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)))
1305 			ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND;
1306 
1307 		if (!size)
1308 			break;
1309 
1310 		if (unlikely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND))
1311 			push_frames_if_head(sk);
1312 		continue;
1313 wait_for_sndbuf:
1314 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1315 wait_for_memory:
1316 		err = csk_wait_memory(cdev, sk, &timeo);
1317 		if (err)
1318 			goto do_error;
1319 	}
1320 out:
1321 	csk_reset_flag(csk, CSK_TX_MORE_DATA);
1322 	if (copied)
1323 		chtls_tcp_push(sk, flags);
1324 done:
1325 	release_sock(sk);
1326 	return copied;
1327 
1328 do_error:
1329 	if (copied)
1330 		goto out;
1331 
1332 out_err:
1333 	if (csk_conn_inline(csk))
1334 		csk_reset_flag(csk, CSK_TX_MORE_DATA);
1335 	copied = sk_stream_error(sk, flags, err);
1336 	goto done;
1337 }
1338 
chtls_select_window(struct sock * sk)1339 static void chtls_select_window(struct sock *sk)
1340 {
1341 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1342 	struct tcp_sock *tp = tcp_sk(sk);
1343 	unsigned int wnd = tp->rcv_wnd;
1344 
1345 	wnd = max_t(unsigned int, wnd, tcp_full_space(sk));
1346 	wnd = max_t(unsigned int, MIN_RCV_WND, wnd);
1347 
1348 	if (wnd > MAX_RCV_WND)
1349 		wnd = MAX_RCV_WND;
1350 
1351 /*
1352  * Check if we need to grow the receive window in response to an increase in
1353  * the socket's receive buffer size.  Some applications increase the buffer
1354  * size dynamically and rely on the window to grow accordingly.
1355  */
1356 
1357 	if (wnd > tp->rcv_wnd) {
1358 		tp->rcv_wup -= wnd - tp->rcv_wnd;
1359 		tp->rcv_wnd = wnd;
1360 		/* Mark the receive window as updated */
1361 		csk_reset_flag(csk, CSK_UPDATE_RCV_WND);
1362 	}
1363 }
1364 
1365 /*
1366  * Send RX credits through an RX_DATA_ACK CPL message.  We are permitted
1367  * to return without sending the message in case we cannot allocate
1368  * an sk_buff.  Returns the number of credits sent.
1369  */
send_rx_credits(struct chtls_sock * csk,u32 credits)1370 static u32 send_rx_credits(struct chtls_sock *csk, u32 credits)
1371 {
1372 	struct cpl_rx_data_ack *req;
1373 	struct sk_buff *skb;
1374 
1375 	skb = alloc_skb(sizeof(*req), GFP_ATOMIC);
1376 	if (!skb)
1377 		return 0;
1378 	__skb_put(skb, sizeof(*req));
1379 	req = (struct cpl_rx_data_ack *)skb->head;
1380 
1381 	set_wr_txq(skb, CPL_PRIORITY_ACK, csk->port_id);
1382 	INIT_TP_WR(req, csk->tid);
1383 	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
1384 						    csk->tid));
1385 	req->credit_dack = cpu_to_be32(RX_CREDITS_V(credits) |
1386 				       RX_FORCE_ACK_F);
1387 	cxgb4_ofld_send(csk->cdev->ports[csk->port_id], skb);
1388 	return credits;
1389 }
1390 
1391 #define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | \
1392 			     TCPF_FIN_WAIT1 | \
1393 			     TCPF_FIN_WAIT2)
1394 
1395 /*
1396  * Called after some received data has been read.  It returns RX credits
1397  * to the HW for the amount of data processed.
1398  */
chtls_cleanup_rbuf(struct sock * sk,int copied)1399 static void chtls_cleanup_rbuf(struct sock *sk, int copied)
1400 {
1401 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1402 	struct tcp_sock *tp;
1403 	int must_send;
1404 	u32 credits;
1405 	u32 thres;
1406 
1407 	thres = 15 * 1024;
1408 
1409 	if (!sk_in_state(sk, CREDIT_RETURN_STATE))
1410 		return;
1411 
1412 	chtls_select_window(sk);
1413 	tp = tcp_sk(sk);
1414 	credits = tp->copied_seq - tp->rcv_wup;
1415 	if (unlikely(!credits))
1416 		return;
1417 
1418 /*
1419  * For coalescing to work effectively ensure the receive window has
1420  * at least 16KB left.
1421  */
1422 	must_send = credits + 16384 >= tp->rcv_wnd;
1423 
1424 	if (must_send || credits >= thres)
1425 		tp->rcv_wup += send_rx_credits(csk, credits);
1426 }
1427 
chtls_pt_recvmsg(struct sock * sk,struct msghdr * msg,size_t len,int flags,int * addr_len)1428 static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
1429 			    int flags, int *addr_len)
1430 {
1431 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1432 	struct chtls_hws *hws = &csk->tlshws;
1433 	struct net_device *dev = csk->egress_dev;
1434 	struct adapter *adap = netdev2adap(dev);
1435 	struct tcp_sock *tp = tcp_sk(sk);
1436 	unsigned long avail;
1437 	int buffers_freed;
1438 	int copied = 0;
1439 	int target;
1440 	long timeo;
1441 
1442 	buffers_freed = 0;
1443 
1444 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1445 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1446 
1447 	if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND)))
1448 		chtls_cleanup_rbuf(sk, copied);
1449 
1450 	do {
1451 		struct sk_buff *skb;
1452 		u32 offset = 0;
1453 
1454 		if (unlikely(tp->urg_data &&
1455 			     tp->urg_seq == tp->copied_seq)) {
1456 			if (copied)
1457 				break;
1458 			if (signal_pending(current)) {
1459 				copied = timeo ? sock_intr_errno(timeo) :
1460 					-EAGAIN;
1461 				break;
1462 			}
1463 		}
1464 		skb = skb_peek(&sk->sk_receive_queue);
1465 		if (skb)
1466 			goto found_ok_skb;
1467 		if (csk->wr_credits &&
1468 		    skb_queue_len(&csk->txq) &&
1469 		    chtls_push_frames(csk, csk->wr_credits ==
1470 				      csk->wr_max_credits))
1471 			sk->sk_write_space(sk);
1472 
1473 		if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
1474 			break;
1475 
1476 		if (copied) {
1477 			if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
1478 			    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1479 			    signal_pending(current))
1480 				break;
1481 
1482 			if (!timeo)
1483 				break;
1484 		} else {
1485 			if (sock_flag(sk, SOCK_DONE))
1486 				break;
1487 			if (sk->sk_err) {
1488 				copied = sock_error(sk);
1489 				break;
1490 			}
1491 			if (sk->sk_shutdown & RCV_SHUTDOWN)
1492 				break;
1493 			if (sk->sk_state == TCP_CLOSE) {
1494 				copied = -ENOTCONN;
1495 				break;
1496 			}
1497 			if (!timeo) {
1498 				copied = -EAGAIN;
1499 				break;
1500 			}
1501 			if (signal_pending(current)) {
1502 				copied = sock_intr_errno(timeo);
1503 				break;
1504 			}
1505 		}
1506 		if (READ_ONCE(sk->sk_backlog.tail)) {
1507 			release_sock(sk);
1508 			lock_sock(sk);
1509 			chtls_cleanup_rbuf(sk, copied);
1510 			continue;
1511 		}
1512 
1513 		if (copied >= target)
1514 			break;
1515 		chtls_cleanup_rbuf(sk, copied);
1516 		sk_wait_data(sk, &timeo, NULL);
1517 		continue;
1518 found_ok_skb:
1519 		if (!skb->len) {
1520 			skb_dst_set(skb, NULL);
1521 			__skb_unlink(skb, &sk->sk_receive_queue);
1522 			kfree_skb(skb);
1523 
1524 			if (!copied && !timeo) {
1525 				copied = -EAGAIN;
1526 				break;
1527 			}
1528 
1529 			if (copied < target) {
1530 				release_sock(sk);
1531 				lock_sock(sk);
1532 				continue;
1533 			}
1534 			break;
1535 		}
1536 		offset = hws->copied_seq;
1537 		avail = skb->len - offset;
1538 		if (len < avail)
1539 			avail = len;
1540 
1541 		if (unlikely(tp->urg_data)) {
1542 			u32 urg_offset = tp->urg_seq - tp->copied_seq;
1543 
1544 			if (urg_offset < avail) {
1545 				if (urg_offset) {
1546 					avail = urg_offset;
1547 				} else if (!sock_flag(sk, SOCK_URGINLINE)) {
1548 					/* First byte is urgent, skip */
1549 					tp->copied_seq++;
1550 					offset++;
1551 					avail--;
1552 					if (!avail)
1553 						goto skip_copy;
1554 				}
1555 			}
1556 		}
1557 		/* Set record type if not already done. For a non-data record,
1558 		 * do not proceed if record type could not be copied.
1559 		 */
1560 		if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) {
1561 			struct tls_hdr *thdr = (struct tls_hdr *)skb->data;
1562 			int cerr = 0;
1563 
1564 			cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
1565 					sizeof(thdr->type), &thdr->type);
1566 
1567 			if (cerr && thdr->type != TLS_RECORD_TYPE_DATA) {
1568 				copied = -EIO;
1569 				break;
1570 			}
1571 			/*  don't send tls header, skip copy */
1572 			goto skip_copy;
1573 		}
1574 
1575 		if (skb_copy_datagram_msg(skb, offset, msg, avail)) {
1576 			if (!copied) {
1577 				copied = -EFAULT;
1578 				break;
1579 			}
1580 		}
1581 
1582 		copied += avail;
1583 		len -= avail;
1584 		hws->copied_seq += avail;
1585 skip_copy:
1586 		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
1587 			tp->urg_data = 0;
1588 
1589 		if ((avail + offset) >= skb->len) {
1590 			struct sk_buff *next_skb;
1591 			if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) {
1592 				tp->copied_seq += skb->len;
1593 				hws->rcvpld = skb->hdr_len;
1594 			} else {
1595 				atomic_inc(&adap->chcr_stats.tls_pdu_rx);
1596 				tp->copied_seq += hws->rcvpld;
1597 			}
1598 			chtls_free_skb(sk, skb);
1599 			buffers_freed++;
1600 			hws->copied_seq = 0;
1601 			next_skb = skb_peek(&sk->sk_receive_queue);
1602 			if (copied >= target && !next_skb)
1603 				break;
1604 			if (ULP_SKB_CB(next_skb)->flags & ULPCB_FLAG_TLS_HDR)
1605 				break;
1606 		}
1607 	} while (len > 0);
1608 
1609 	if (buffers_freed)
1610 		chtls_cleanup_rbuf(sk, copied);
1611 	release_sock(sk);
1612 	return copied;
1613 }
1614 
1615 /*
1616  * Peek at data in a socket's receive buffer.
1617  */
peekmsg(struct sock * sk,struct msghdr * msg,size_t len,int flags)1618 static int peekmsg(struct sock *sk, struct msghdr *msg,
1619 		   size_t len, int flags)
1620 {
1621 	struct tcp_sock *tp = tcp_sk(sk);
1622 	u32 peek_seq, offset;
1623 	struct sk_buff *skb;
1624 	int copied = 0;
1625 	size_t avail;          /* amount of available data in current skb */
1626 	long timeo;
1627 
1628 	lock_sock(sk);
1629 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1630 	peek_seq = tp->copied_seq;
1631 
1632 	do {
1633 		if (unlikely(tp->urg_data && tp->urg_seq == peek_seq)) {
1634 			if (copied)
1635 				break;
1636 			if (signal_pending(current)) {
1637 				copied = timeo ? sock_intr_errno(timeo) :
1638 				-EAGAIN;
1639 				break;
1640 			}
1641 		}
1642 
1643 		skb_queue_walk(&sk->sk_receive_queue, skb) {
1644 			offset = peek_seq - ULP_SKB_CB(skb)->seq;
1645 			if (offset < skb->len)
1646 				goto found_ok_skb;
1647 		}
1648 
1649 		/* empty receive queue */
1650 		if (copied)
1651 			break;
1652 		if (sock_flag(sk, SOCK_DONE))
1653 			break;
1654 		if (sk->sk_err) {
1655 			copied = sock_error(sk);
1656 			break;
1657 		}
1658 		if (sk->sk_shutdown & RCV_SHUTDOWN)
1659 			break;
1660 		if (sk->sk_state == TCP_CLOSE) {
1661 			copied = -ENOTCONN;
1662 			break;
1663 		}
1664 		if (!timeo) {
1665 			copied = -EAGAIN;
1666 			break;
1667 		}
1668 		if (signal_pending(current)) {
1669 			copied = sock_intr_errno(timeo);
1670 			break;
1671 		}
1672 
1673 		if (READ_ONCE(sk->sk_backlog.tail)) {
1674 			/* Do not sleep, just process backlog. */
1675 			release_sock(sk);
1676 			lock_sock(sk);
1677 		} else {
1678 			sk_wait_data(sk, &timeo, NULL);
1679 		}
1680 
1681 		if (unlikely(peek_seq != tp->copied_seq)) {
1682 			if (net_ratelimit())
1683 				pr_info("TCP(%s:%d), race in MSG_PEEK.\n",
1684 					current->comm, current->pid);
1685 			peek_seq = tp->copied_seq;
1686 		}
1687 		continue;
1688 
1689 found_ok_skb:
1690 		avail = skb->len - offset;
1691 		if (len < avail)
1692 			avail = len;
1693 		/*
1694 		 * Do we have urgent data here?  We need to skip over the
1695 		 * urgent byte.
1696 		 */
1697 		if (unlikely(tp->urg_data)) {
1698 			u32 urg_offset = tp->urg_seq - peek_seq;
1699 
1700 			if (urg_offset < avail) {
1701 				/*
1702 				 * The amount of data we are preparing to copy
1703 				 * contains urgent data.
1704 				 */
1705 				if (!urg_offset) { /* First byte is urgent */
1706 					if (!sock_flag(sk, SOCK_URGINLINE)) {
1707 						peek_seq++;
1708 						offset++;
1709 						avail--;
1710 					}
1711 					if (!avail)
1712 						continue;
1713 				} else {
1714 					/* stop short of the urgent data */
1715 					avail = urg_offset;
1716 				}
1717 			}
1718 		}
1719 
1720 		/*
1721 		 * If MSG_TRUNC is specified the data is discarded.
1722 		 */
1723 		if (likely(!(flags & MSG_TRUNC)))
1724 			if (skb_copy_datagram_msg(skb, offset, msg, len)) {
1725 				if (!copied) {
1726 					copied = -EFAULT;
1727 					break;
1728 				}
1729 			}
1730 		peek_seq += avail;
1731 		copied += avail;
1732 		len -= avail;
1733 	} while (len > 0);
1734 
1735 	release_sock(sk);
1736 	return copied;
1737 }
1738 
chtls_recvmsg(struct sock * sk,struct msghdr * msg,size_t len,int flags,int * addr_len)1739 int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
1740 		  int flags, int *addr_len)
1741 {
1742 	struct tcp_sock *tp = tcp_sk(sk);
1743 	struct chtls_sock *csk;
1744 	unsigned long avail;    /* amount of available data in current skb */
1745 	int buffers_freed;
1746 	int copied = 0;
1747 	long timeo;
1748 	int target;             /* Read at least this many bytes */
1749 
1750 	buffers_freed = 0;
1751 
1752 	if (unlikely(flags & MSG_OOB))
1753 		return tcp_prot.recvmsg(sk, msg, len, flags, addr_len);
1754 
1755 	if (unlikely(flags & MSG_PEEK))
1756 		return peekmsg(sk, msg, len, flags);
1757 
1758 	if (sk_can_busy_loop(sk) &&
1759 	    skb_queue_empty_lockless(&sk->sk_receive_queue) &&
1760 	    sk->sk_state == TCP_ESTABLISHED)
1761 		sk_busy_loop(sk, flags & MSG_DONTWAIT);
1762 
1763 	lock_sock(sk);
1764 	csk = rcu_dereference_sk_user_data(sk);
1765 
1766 	if (is_tls_rx(csk))
1767 		return chtls_pt_recvmsg(sk, msg, len, flags, addr_len);
1768 
1769 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1770 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1771 
1772 	if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND)))
1773 		chtls_cleanup_rbuf(sk, copied);
1774 
1775 	do {
1776 		struct sk_buff *skb;
1777 		u32 offset;
1778 
1779 		if (unlikely(tp->urg_data && tp->urg_seq == tp->copied_seq)) {
1780 			if (copied)
1781 				break;
1782 			if (signal_pending(current)) {
1783 				copied = timeo ? sock_intr_errno(timeo) :
1784 					-EAGAIN;
1785 				break;
1786 			}
1787 		}
1788 
1789 		skb = skb_peek(&sk->sk_receive_queue);
1790 		if (skb)
1791 			goto found_ok_skb;
1792 
1793 		if (csk->wr_credits &&
1794 		    skb_queue_len(&csk->txq) &&
1795 		    chtls_push_frames(csk, csk->wr_credits ==
1796 				      csk->wr_max_credits))
1797 			sk->sk_write_space(sk);
1798 
1799 		if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
1800 			break;
1801 
1802 		if (copied) {
1803 			if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
1804 			    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1805 			    signal_pending(current))
1806 				break;
1807 		} else {
1808 			if (sock_flag(sk, SOCK_DONE))
1809 				break;
1810 			if (sk->sk_err) {
1811 				copied = sock_error(sk);
1812 				break;
1813 			}
1814 			if (sk->sk_shutdown & RCV_SHUTDOWN)
1815 				break;
1816 			if (sk->sk_state == TCP_CLOSE) {
1817 				copied = -ENOTCONN;
1818 				break;
1819 			}
1820 			if (!timeo) {
1821 				copied = -EAGAIN;
1822 				break;
1823 			}
1824 			if (signal_pending(current)) {
1825 				copied = sock_intr_errno(timeo);
1826 				break;
1827 			}
1828 		}
1829 
1830 		if (READ_ONCE(sk->sk_backlog.tail)) {
1831 			release_sock(sk);
1832 			lock_sock(sk);
1833 			chtls_cleanup_rbuf(sk, copied);
1834 			continue;
1835 		}
1836 
1837 		if (copied >= target)
1838 			break;
1839 		chtls_cleanup_rbuf(sk, copied);
1840 		sk_wait_data(sk, &timeo, NULL);
1841 		continue;
1842 
1843 found_ok_skb:
1844 		if (!skb->len) {
1845 			chtls_kfree_skb(sk, skb);
1846 			if (!copied && !timeo) {
1847 				copied = -EAGAIN;
1848 				break;
1849 			}
1850 
1851 			if (copied < target)
1852 				continue;
1853 
1854 			break;
1855 		}
1856 
1857 		offset = tp->copied_seq - ULP_SKB_CB(skb)->seq;
1858 		avail = skb->len - offset;
1859 		if (len < avail)
1860 			avail = len;
1861 
1862 		if (unlikely(tp->urg_data)) {
1863 			u32 urg_offset = tp->urg_seq - tp->copied_seq;
1864 
1865 			if (urg_offset < avail) {
1866 				if (urg_offset) {
1867 					avail = urg_offset;
1868 				} else if (!sock_flag(sk, SOCK_URGINLINE)) {
1869 					tp->copied_seq++;
1870 					offset++;
1871 					avail--;
1872 					if (!avail)
1873 						goto skip_copy;
1874 				}
1875 			}
1876 		}
1877 
1878 		if (likely(!(flags & MSG_TRUNC))) {
1879 			if (skb_copy_datagram_msg(skb, offset,
1880 						  msg, avail)) {
1881 				if (!copied) {
1882 					copied = -EFAULT;
1883 					break;
1884 				}
1885 			}
1886 		}
1887 
1888 		tp->copied_seq += avail;
1889 		copied += avail;
1890 		len -= avail;
1891 
1892 skip_copy:
1893 		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
1894 			tp->urg_data = 0;
1895 
1896 		if (avail + offset >= skb->len) {
1897 			chtls_free_skb(sk, skb);
1898 			buffers_freed++;
1899 
1900 			if  (copied >= target &&
1901 			     !skb_peek(&sk->sk_receive_queue))
1902 				break;
1903 		}
1904 	} while (len > 0);
1905 
1906 	if (buffers_freed)
1907 		chtls_cleanup_rbuf(sk, copied);
1908 
1909 	release_sock(sk);
1910 	return copied;
1911 }
1912