1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2021 Intel Corporation. All rights reserved. */
3 #include <linux/io-64-nonatomic-lo-hi.h>
4 #include <linux/device.h>
5 #include <linux/delay.h>
6 #include <linux/pci.h>
7 #include <linux/pci-doe.h>
8 #include <cxlpci.h>
9 #include <cxlmem.h>
10 #include <cxl.h>
11 #include "core.h"
12
13 /**
14 * DOC: cxl core pci
15 *
16 * Compute Express Link protocols are layered on top of PCIe. CXL core provides
17 * a set of helpers for CXL interactions which occur via PCIe.
18 */
19
20 static unsigned short media_ready_timeout = 60;
21 module_param(media_ready_timeout, ushort, 0644);
22 MODULE_PARM_DESC(media_ready_timeout, "seconds to wait for media ready");
23
24 struct cxl_walk_context {
25 struct pci_bus *bus;
26 struct cxl_port *port;
27 int type;
28 int error;
29 int count;
30 };
31
match_add_dports(struct pci_dev * pdev,void * data)32 static int match_add_dports(struct pci_dev *pdev, void *data)
33 {
34 struct cxl_walk_context *ctx = data;
35 struct cxl_port *port = ctx->port;
36 int type = pci_pcie_type(pdev);
37 struct cxl_register_map map;
38 struct cxl_dport *dport;
39 u32 lnkcap, port_num;
40 int rc;
41
42 if (pdev->bus != ctx->bus)
43 return 0;
44 if (!pci_is_pcie(pdev))
45 return 0;
46 if (type != ctx->type)
47 return 0;
48 if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP,
49 &lnkcap))
50 return 0;
51
52 rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
53 if (rc)
54 dev_dbg(&port->dev, "failed to find component registers\n");
55
56 port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap);
57 dport = devm_cxl_add_dport(port, &pdev->dev, port_num,
58 cxl_regmap_to_base(pdev, &map));
59 if (IS_ERR(dport)) {
60 ctx->error = PTR_ERR(dport);
61 return PTR_ERR(dport);
62 }
63 ctx->count++;
64
65 dev_dbg(&port->dev, "add dport%d: %s\n", port_num, dev_name(&pdev->dev));
66
67 return 0;
68 }
69
70 /**
71 * devm_cxl_port_enumerate_dports - enumerate downstream ports of the upstream port
72 * @port: cxl_port whose ->uport is the upstream of dports to be enumerated
73 *
74 * Returns a positive number of dports enumerated or a negative error
75 * code.
76 */
devm_cxl_port_enumerate_dports(struct cxl_port * port)77 int devm_cxl_port_enumerate_dports(struct cxl_port *port)
78 {
79 struct pci_bus *bus = cxl_port_to_pci_bus(port);
80 struct cxl_walk_context ctx;
81 int type;
82
83 if (!bus)
84 return -ENXIO;
85
86 if (pci_is_root_bus(bus))
87 type = PCI_EXP_TYPE_ROOT_PORT;
88 else
89 type = PCI_EXP_TYPE_DOWNSTREAM;
90
91 ctx = (struct cxl_walk_context) {
92 .port = port,
93 .bus = bus,
94 .type = type,
95 };
96 pci_walk_bus(bus, match_add_dports, &ctx);
97
98 if (ctx.count == 0)
99 return -ENODEV;
100 if (ctx.error)
101 return ctx.error;
102 return ctx.count;
103 }
104 EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, CXL);
105
106 /*
107 * Wait up to @media_ready_timeout for the device to report memory
108 * active.
109 */
cxl_await_media_ready(struct cxl_dev_state * cxlds)110 int cxl_await_media_ready(struct cxl_dev_state *cxlds)
111 {
112 struct pci_dev *pdev = to_pci_dev(cxlds->dev);
113 int d = cxlds->cxl_dvsec;
114 bool active = false;
115 u64 md_status;
116 int rc, i;
117
118 for (i = media_ready_timeout; i; i--) {
119 u32 temp;
120
121 rc = pci_read_config_dword(
122 pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp);
123 if (rc)
124 return rc;
125
126 active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp);
127 if (active)
128 break;
129 msleep(1000);
130 }
131
132 if (!active) {
133 dev_err(&pdev->dev,
134 "timeout awaiting memory active after %d seconds\n",
135 media_ready_timeout);
136 return -ETIMEDOUT;
137 }
138
139 md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
140 if (!CXLMDEV_READY(md_status))
141 return -EIO;
142
143 return 0;
144 }
145 EXPORT_SYMBOL_NS_GPL(cxl_await_media_ready, CXL);
146
wait_for_valid(struct cxl_dev_state * cxlds)147 static int wait_for_valid(struct cxl_dev_state *cxlds)
148 {
149 struct pci_dev *pdev = to_pci_dev(cxlds->dev);
150 int d = cxlds->cxl_dvsec, rc;
151 u32 val;
152
153 /*
154 * Memory_Info_Valid: When set, indicates that the CXL Range 1 Size high
155 * and Size Low registers are valid. Must be set within 1 second of
156 * deassertion of reset to CXL device. Likely it is already set by the
157 * time this runs, but otherwise give a 1.5 second timeout in case of
158 * clock skew.
159 */
160 rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val);
161 if (rc)
162 return rc;
163
164 if (val & CXL_DVSEC_MEM_INFO_VALID)
165 return 0;
166
167 msleep(1500);
168
169 rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val);
170 if (rc)
171 return rc;
172
173 if (val & CXL_DVSEC_MEM_INFO_VALID)
174 return 0;
175
176 return -ETIMEDOUT;
177 }
178
cxl_set_mem_enable(struct cxl_dev_state * cxlds,u16 val)179 static int cxl_set_mem_enable(struct cxl_dev_state *cxlds, u16 val)
180 {
181 struct pci_dev *pdev = to_pci_dev(cxlds->dev);
182 int d = cxlds->cxl_dvsec;
183 u16 ctrl;
184 int rc;
185
186 rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl);
187 if (rc < 0)
188 return rc;
189
190 if ((ctrl & CXL_DVSEC_MEM_ENABLE) == val)
191 return 1;
192 ctrl &= ~CXL_DVSEC_MEM_ENABLE;
193 ctrl |= val;
194
195 rc = pci_write_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, ctrl);
196 if (rc < 0)
197 return rc;
198
199 return 0;
200 }
201
clear_mem_enable(void * cxlds)202 static void clear_mem_enable(void *cxlds)
203 {
204 cxl_set_mem_enable(cxlds, 0);
205 }
206
devm_cxl_enable_mem(struct device * host,struct cxl_dev_state * cxlds)207 static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds)
208 {
209 int rc;
210
211 rc = cxl_set_mem_enable(cxlds, CXL_DVSEC_MEM_ENABLE);
212 if (rc < 0)
213 return rc;
214 if (rc > 0)
215 return 0;
216 return devm_add_action_or_reset(host, clear_mem_enable, cxlds);
217 }
218
range_contains(struct range * r1,struct range * r2)219 static bool range_contains(struct range *r1, struct range *r2)
220 {
221 return r1->start <= r2->start && r1->end >= r2->end;
222 }
223
224 /* require dvsec ranges to be covered by a locked platform window */
dvsec_range_allowed(struct device * dev,void * arg)225 static int dvsec_range_allowed(struct device *dev, void *arg)
226 {
227 struct range *dev_range = arg;
228 struct cxl_decoder *cxld;
229
230 if (!is_root_decoder(dev))
231 return 0;
232
233 cxld = to_cxl_decoder(dev);
234
235 if (!(cxld->flags & CXL_DECODER_F_LOCK))
236 return 0;
237 if (!(cxld->flags & CXL_DECODER_F_RAM))
238 return 0;
239
240 return range_contains(&cxld->hpa_range, dev_range);
241 }
242
disable_hdm(void * _cxlhdm)243 static void disable_hdm(void *_cxlhdm)
244 {
245 u32 global_ctrl;
246 struct cxl_hdm *cxlhdm = _cxlhdm;
247 void __iomem *hdm = cxlhdm->regs.hdm_decoder;
248
249 global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
250 writel(global_ctrl & ~CXL_HDM_DECODER_ENABLE,
251 hdm + CXL_HDM_DECODER_CTRL_OFFSET);
252 }
253
devm_cxl_enable_hdm(struct device * host,struct cxl_hdm * cxlhdm)254 static int devm_cxl_enable_hdm(struct device *host, struct cxl_hdm *cxlhdm)
255 {
256 void __iomem *hdm = cxlhdm->regs.hdm_decoder;
257 u32 global_ctrl;
258
259 global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
260 writel(global_ctrl | CXL_HDM_DECODER_ENABLE,
261 hdm + CXL_HDM_DECODER_CTRL_OFFSET);
262
263 return devm_add_action_or_reset(host, disable_hdm, cxlhdm);
264 }
265
__cxl_hdm_decode_init(struct cxl_dev_state * cxlds,struct cxl_hdm * cxlhdm,struct cxl_endpoint_dvsec_info * info)266 static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
267 struct cxl_hdm *cxlhdm,
268 struct cxl_endpoint_dvsec_info *info)
269 {
270 void __iomem *hdm = cxlhdm->regs.hdm_decoder;
271 struct cxl_port *port = cxlhdm->port;
272 struct device *dev = cxlds->dev;
273 struct cxl_port *root;
274 int i, rc, allowed;
275 u32 global_ctrl;
276
277 global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
278
279 /*
280 * If the HDM Decoder Capability is already enabled then assume
281 * that some other agent like platform firmware set it up.
282 */
283 if (global_ctrl & CXL_HDM_DECODER_ENABLE) {
284 rc = devm_cxl_enable_mem(&port->dev, cxlds);
285 if (rc)
286 return false;
287 return true;
288 }
289
290 root = to_cxl_port(port->dev.parent);
291 while (!is_cxl_root(root) && is_cxl_port(root->dev.parent))
292 root = to_cxl_port(root->dev.parent);
293 if (!is_cxl_root(root)) {
294 dev_err(dev, "Failed to acquire root port for HDM enable\n");
295 return false;
296 }
297
298 for (i = 0, allowed = 0; info->mem_enabled && i < info->ranges; i++) {
299 struct device *cxld_dev;
300
301 cxld_dev = device_find_child(&root->dev, &info->dvsec_range[i],
302 dvsec_range_allowed);
303 if (!cxld_dev) {
304 dev_dbg(dev, "DVSEC Range%d denied by platform\n", i);
305 continue;
306 }
307 dev_dbg(dev, "DVSEC Range%d allowed by platform\n", i);
308 put_device(cxld_dev);
309 allowed++;
310 }
311
312 if (!allowed) {
313 cxl_set_mem_enable(cxlds, 0);
314 info->mem_enabled = 0;
315 }
316
317 /*
318 * Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base
319 * [High,Low] when HDM operation is enabled the range register values
320 * are ignored by the device, but the spec also recommends matching the
321 * DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges
322 * are expected even though Linux does not require or maintain that
323 * match. If at least one DVSEC range is enabled and allowed, skip HDM
324 * Decoder Capability Enable.
325 */
326 if (info->mem_enabled)
327 return false;
328
329 rc = devm_cxl_enable_hdm(&port->dev, cxlhdm);
330 if (rc)
331 return false;
332
333 rc = devm_cxl_enable_mem(&port->dev, cxlds);
334 if (rc)
335 return false;
336
337 return true;
338 }
339
340 /**
341 * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint
342 * @cxlds: Device state
343 * @cxlhdm: Mapped HDM decoder Capability
344 *
345 * Try to enable the endpoint's HDM Decoder Capability
346 */
cxl_hdm_decode_init(struct cxl_dev_state * cxlds,struct cxl_hdm * cxlhdm)347 int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm)
348 {
349 struct pci_dev *pdev = to_pci_dev(cxlds->dev);
350 struct cxl_endpoint_dvsec_info info = { 0 };
351 int hdm_count, rc, i, ranges = 0;
352 struct device *dev = &pdev->dev;
353 int d = cxlds->cxl_dvsec;
354 u16 cap, ctrl;
355
356 if (!d) {
357 dev_dbg(dev, "No DVSEC Capability\n");
358 return -ENXIO;
359 }
360
361 rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap);
362 if (rc)
363 return rc;
364
365 rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl);
366 if (rc)
367 return rc;
368
369 if (!(cap & CXL_DVSEC_MEM_CAPABLE)) {
370 dev_dbg(dev, "Not MEM Capable\n");
371 return -ENXIO;
372 }
373
374 /*
375 * It is not allowed by spec for MEM.capable to be set and have 0 legacy
376 * HDM decoders (values > 2 are also undefined as of CXL 2.0). As this
377 * driver is for a spec defined class code which must be CXL.mem
378 * capable, there is no point in continuing to enable CXL.mem.
379 */
380 hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap);
381 if (!hdm_count || hdm_count > 2)
382 return -EINVAL;
383
384 rc = wait_for_valid(cxlds);
385 if (rc) {
386 dev_dbg(dev, "Failure awaiting MEM_INFO_VALID (%d)\n", rc);
387 return rc;
388 }
389
390 /*
391 * The current DVSEC values are moot if the memory capability is
392 * disabled, and they will remain moot after the HDM Decoder
393 * capability is enabled.
394 */
395 info.mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl);
396 if (!info.mem_enabled)
397 goto hdm_init;
398
399 for (i = 0; i < hdm_count; i++) {
400 u64 base, size;
401 u32 temp;
402
403 rc = pci_read_config_dword(
404 pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp);
405 if (rc)
406 return rc;
407
408 size = (u64)temp << 32;
409
410 rc = pci_read_config_dword(
411 pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp);
412 if (rc)
413 return rc;
414
415 size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK;
416
417 rc = pci_read_config_dword(
418 pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp);
419 if (rc)
420 return rc;
421
422 base = (u64)temp << 32;
423
424 rc = pci_read_config_dword(
425 pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp);
426 if (rc)
427 return rc;
428
429 base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK;
430
431 info.dvsec_range[i] = (struct range) {
432 .start = base,
433 .end = base + size - 1
434 };
435
436 if (size)
437 ranges++;
438 }
439
440 info.ranges = ranges;
441
442 /*
443 * If DVSEC ranges are being used instead of HDM decoder registers there
444 * is no use in trying to manage those.
445 */
446 hdm_init:
447 if (!__cxl_hdm_decode_init(cxlds, cxlhdm, &info)) {
448 dev_err(dev,
449 "Legacy range registers configuration prevents HDM operation.\n");
450 return -EBUSY;
451 }
452
453 return 0;
454 }
455 EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, CXL);
456
457 #define CXL_DOE_TABLE_ACCESS_REQ_CODE 0x000000ff
458 #define CXL_DOE_TABLE_ACCESS_REQ_CODE_READ 0
459 #define CXL_DOE_TABLE_ACCESS_TABLE_TYPE 0x0000ff00
460 #define CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA 0
461 #define CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE 0xffff0000
462 #define CXL_DOE_TABLE_ACCESS_LAST_ENTRY 0xffff
463 #define CXL_DOE_PROTOCOL_TABLE_ACCESS 2
464
find_cdat_doe(struct device * uport)465 static struct pci_doe_mb *find_cdat_doe(struct device *uport)
466 {
467 struct cxl_memdev *cxlmd;
468 struct cxl_dev_state *cxlds;
469 unsigned long index;
470 void *entry;
471
472 cxlmd = to_cxl_memdev(uport);
473 cxlds = cxlmd->cxlds;
474
475 xa_for_each(&cxlds->doe_mbs, index, entry) {
476 struct pci_doe_mb *cur = entry;
477
478 if (pci_doe_supports_prot(cur, PCI_DVSEC_VENDOR_ID_CXL,
479 CXL_DOE_PROTOCOL_TABLE_ACCESS))
480 return cur;
481 }
482
483 return NULL;
484 }
485
486 #define CDAT_DOE_REQ(entry_handle) \
487 (FIELD_PREP(CXL_DOE_TABLE_ACCESS_REQ_CODE, \
488 CXL_DOE_TABLE_ACCESS_REQ_CODE_READ) | \
489 FIELD_PREP(CXL_DOE_TABLE_ACCESS_TABLE_TYPE, \
490 CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA) | \
491 FIELD_PREP(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, (entry_handle)))
492
cxl_doe_task_complete(struct pci_doe_task * task)493 static void cxl_doe_task_complete(struct pci_doe_task *task)
494 {
495 complete(task->private);
496 }
497
498 struct cdat_doe_task {
499 u32 request_pl;
500 u32 response_pl[32];
501 struct completion c;
502 struct pci_doe_task task;
503 };
504
505 #define DECLARE_CDAT_DOE_TASK(req, cdt) \
506 struct cdat_doe_task cdt = { \
507 .c = COMPLETION_INITIALIZER_ONSTACK(cdt.c), \
508 .request_pl = req, \
509 .task = { \
510 .prot.vid = PCI_DVSEC_VENDOR_ID_CXL, \
511 .prot.type = CXL_DOE_PROTOCOL_TABLE_ACCESS, \
512 .request_pl = &cdt.request_pl, \
513 .request_pl_sz = sizeof(cdt.request_pl), \
514 .response_pl = cdt.response_pl, \
515 .response_pl_sz = sizeof(cdt.response_pl), \
516 .complete = cxl_doe_task_complete, \
517 .private = &cdt.c, \
518 } \
519 }
520
cxl_cdat_get_length(struct device * dev,struct pci_doe_mb * cdat_doe,size_t * length)521 static int cxl_cdat_get_length(struct device *dev,
522 struct pci_doe_mb *cdat_doe,
523 size_t *length)
524 {
525 DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(0), t);
526 int rc;
527
528 rc = pci_doe_submit_task(cdat_doe, &t.task);
529 if (rc < 0) {
530 dev_err(dev, "DOE submit failed: %d", rc);
531 return rc;
532 }
533 wait_for_completion(&t.c);
534 if (t.task.rv < sizeof(u32))
535 return -EIO;
536
537 *length = t.response_pl[1];
538 dev_dbg(dev, "CDAT length %zu\n", *length);
539
540 return 0;
541 }
542
cxl_cdat_read_table(struct device * dev,struct pci_doe_mb * cdat_doe,struct cxl_cdat * cdat)543 static int cxl_cdat_read_table(struct device *dev,
544 struct pci_doe_mb *cdat_doe,
545 struct cxl_cdat *cdat)
546 {
547 size_t length = cdat->length;
548 u32 *data = cdat->table;
549 int entry_handle = 0;
550
551 do {
552 DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(entry_handle), t);
553 size_t entry_dw;
554 u32 *entry;
555 int rc;
556
557 rc = pci_doe_submit_task(cdat_doe, &t.task);
558 if (rc < 0) {
559 dev_err(dev, "DOE submit failed: %d", rc);
560 return rc;
561 }
562 wait_for_completion(&t.c);
563 /* 1 DW header + 1 DW data min */
564 if (t.task.rv < (2 * sizeof(u32)))
565 return -EIO;
566
567 /* Get the CXL table access header entry handle */
568 entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE,
569 t.response_pl[0]);
570 entry = t.response_pl + 1;
571 entry_dw = t.task.rv / sizeof(u32);
572 /* Skip Header */
573 entry_dw -= 1;
574 entry_dw = min(length / sizeof(u32), entry_dw);
575 /* Prevent length < 1 DW from causing a buffer overflow */
576 if (entry_dw) {
577 memcpy(data, entry, entry_dw * sizeof(u32));
578 length -= entry_dw * sizeof(u32);
579 data += entry_dw;
580 }
581 } while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY);
582
583 return 0;
584 }
585
586 /**
587 * read_cdat_data - Read the CDAT data on this port
588 * @port: Port to read data from
589 *
590 * This call will sleep waiting for responses from the DOE mailbox.
591 */
read_cdat_data(struct cxl_port * port)592 void read_cdat_data(struct cxl_port *port)
593 {
594 struct pci_doe_mb *cdat_doe;
595 struct device *dev = &port->dev;
596 struct device *uport = port->uport;
597 size_t cdat_length;
598 int rc;
599
600 cdat_doe = find_cdat_doe(uport);
601 if (!cdat_doe) {
602 dev_dbg(dev, "No CDAT mailbox\n");
603 return;
604 }
605
606 port->cdat_available = true;
607
608 if (cxl_cdat_get_length(dev, cdat_doe, &cdat_length)) {
609 dev_dbg(dev, "No CDAT length\n");
610 return;
611 }
612
613 port->cdat.table = devm_kzalloc(dev, cdat_length, GFP_KERNEL);
614 if (!port->cdat.table)
615 return;
616
617 port->cdat.length = cdat_length;
618 rc = cxl_cdat_read_table(dev, cdat_doe, &port->cdat);
619 if (rc) {
620 /* Don't leave table data allocated on error */
621 devm_kfree(dev, port->cdat.table);
622 port->cdat.table = NULL;
623 port->cdat.length = 0;
624 dev_err(dev, "CDAT data read error\n");
625 }
626 }
627 EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
628