1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright 2020-21 IBM Corp.
4 */
5
6 #define pr_fmt(fmt) "vas: " fmt
7
8 #include <linux/module.h>
9 #include <linux/kernel.h>
10 #include <linux/export.h>
11 #include <linux/types.h>
12 #include <linux/delay.h>
13 #include <linux/slab.h>
14 #include <linux/interrupt.h>
15 #include <linux/irqdomain.h>
16 #include <asm/machdep.h>
17 #include <asm/hvcall.h>
18 #include <asm/plpar_wrappers.h>
19 #include <asm/firmware.h>
20 #include <asm/vas.h>
21 #include "vas.h"
22
23 #define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul
24 #define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul
25 /* The hypervisor allows one credit per window right now */
26 #define DEF_WIN_CREDS 1
27
28 static struct vas_all_caps caps_all;
29 static bool copypaste_feat;
30 static struct hv_vas_cop_feat_caps hv_cop_caps;
31
32 static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE];
33 static DEFINE_MUTEX(vas_pseries_mutex);
34 static bool migration_in_progress;
35
hcall_return_busy_check(long rc)36 static long hcall_return_busy_check(long rc)
37 {
38 /* Check if we are stalled for some time */
39 if (H_IS_LONG_BUSY(rc)) {
40 msleep(get_longbusy_msecs(rc));
41 rc = H_BUSY;
42 } else if (rc == H_BUSY) {
43 cond_resched();
44 }
45
46 return rc;
47 }
48
49 /*
50 * Allocate VAS window hcall
51 */
h_allocate_vas_window(struct pseries_vas_window * win,u64 * domain,u8 wintype,u16 credits)52 static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain,
53 u8 wintype, u16 credits)
54 {
55 long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
56 long rc;
57
58 do {
59 rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype,
60 credits, domain[0], domain[1], domain[2],
61 domain[3], domain[4], domain[5]);
62
63 rc = hcall_return_busy_check(rc);
64 } while (rc == H_BUSY);
65
66 if (rc == H_SUCCESS) {
67 if (win->win_addr == VAS_INVALID_WIN_ADDRESS) {
68 pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n");
69 return -ENOTSUPP;
70 }
71 win->vas_win.winid = retbuf[0];
72 win->win_addr = retbuf[1];
73 win->complete_irq = retbuf[2];
74 win->fault_irq = retbuf[3];
75 return 0;
76 }
77
78 pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n",
79 rc, wintype, credits);
80
81 return -EIO;
82 }
83
84 /*
85 * Deallocate VAS window hcall.
86 */
h_deallocate_vas_window(u64 winid)87 static int h_deallocate_vas_window(u64 winid)
88 {
89 long rc;
90
91 do {
92 rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid);
93
94 rc = hcall_return_busy_check(rc);
95 } while (rc == H_BUSY);
96
97 if (rc == H_SUCCESS)
98 return 0;
99
100 pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n",
101 rc, winid);
102 return -EIO;
103 }
104
105 /*
106 * Modify VAS window.
107 * After the window is opened with allocate window hcall, configure it
108 * with flags and LPAR PID before using.
109 */
h_modify_vas_window(struct pseries_vas_window * win)110 static int h_modify_vas_window(struct pseries_vas_window *win)
111 {
112 long rc;
113
114 /*
115 * AMR value is not supported in Linux VAS implementation.
116 * The hypervisor ignores it if 0 is passed.
117 */
118 do {
119 rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW,
120 win->vas_win.winid, win->pid, 0,
121 VAS_MOD_WIN_FLAGS, 0);
122
123 rc = hcall_return_busy_check(rc);
124 } while (rc == H_BUSY);
125
126 if (rc == H_SUCCESS)
127 return 0;
128
129 pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n",
130 rc, win->vas_win.winid, win->pid);
131 return -EIO;
132 }
133
134 /*
135 * This hcall is used to determine the capabilities from the hypervisor.
136 * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES
137 * @query_type: If 0 is passed, the hypervisor returns the overall
138 * capabilities which provides all feature(s) that are
139 * available. Then query the hypervisor to get the
140 * corresponding capabilities for the specific feature.
141 * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS
142 * and VAS GZIP Default capabilities.
143 * H_QUERY_NX_CAPABILITIES provides NX GZIP
144 * capabilities.
145 * @result: Return buffer to save capabilities.
146 */
h_query_vas_capabilities(const u64 hcall,u8 query_type,u64 result)147 int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result)
148 {
149 long rc;
150
151 rc = plpar_hcall_norets(hcall, query_type, result);
152
153 if (rc == H_SUCCESS)
154 return 0;
155
156 /* H_FUNCTION means HV does not support VAS so don't print an error */
157 if (rc != H_FUNCTION) {
158 pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n",
159 (hcall == H_QUERY_VAS_CAPABILITIES) ?
160 "H_QUERY_VAS_CAPABILITIES" :
161 "H_QUERY_NX_CAPABILITIES",
162 rc, query_type, result);
163 }
164
165 return -EIO;
166 }
167 EXPORT_SYMBOL_GPL(h_query_vas_capabilities);
168
169 /*
170 * hcall to get fault CRB from the hypervisor.
171 */
h_get_nx_fault(u32 winid,u64 buffer)172 static int h_get_nx_fault(u32 winid, u64 buffer)
173 {
174 long rc;
175
176 rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
177
178 if (rc == H_SUCCESS)
179 return 0;
180
181 pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n",
182 rc, winid, buffer);
183 return -EIO;
184
185 }
186
187 /*
188 * Handle the fault interrupt.
189 * When the fault interrupt is received for each window, query the
190 * hypervisor to get the fault CRB on the specific fault. Then
191 * process the CRB by updating CSB or send signal if the user space
192 * CSB is invalid.
193 * Note: The hypervisor forwards an interrupt for each fault request.
194 * So one fault CRB to process for each H_GET_NX_FAULT hcall.
195 */
pseries_vas_fault_thread_fn(int irq,void * data)196 static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
197 {
198 struct pseries_vas_window *txwin = data;
199 struct coprocessor_request_block crb;
200 struct vas_user_win_ref *tsk_ref;
201 int rc;
202
203 while (atomic_read(&txwin->pending_faults)) {
204 rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
205 if (!rc) {
206 tsk_ref = &txwin->vas_win.task_ref;
207 vas_dump_crb(&crb);
208 vas_update_csb(&crb, tsk_ref);
209 }
210 atomic_dec(&txwin->pending_faults);
211 }
212
213 return IRQ_HANDLED;
214 }
215
216 /*
217 * irq_default_primary_handler() can be used only with IRQF_ONESHOT
218 * which disables IRQ before executing the thread handler and enables
219 * it after. But this disabling interrupt sets the VAS IRQ OFF
220 * state in the hypervisor. If the NX generates fault interrupt
221 * during this window, the hypervisor will not deliver this
222 * interrupt to the LPAR. So use VAS specific IRQ handler instead
223 * of calling the default primary handler.
224 */
pseries_vas_irq_handler(int irq,void * data)225 static irqreturn_t pseries_vas_irq_handler(int irq, void *data)
226 {
227 struct pseries_vas_window *txwin = data;
228
229 /*
230 * The thread hanlder will process this interrupt if it is
231 * already running.
232 */
233 atomic_inc(&txwin->pending_faults);
234
235 return IRQ_WAKE_THREAD;
236 }
237
238 /*
239 * Allocate window and setup IRQ mapping.
240 */
allocate_setup_window(struct pseries_vas_window * txwin,u64 * domain,u8 wintype)241 static int allocate_setup_window(struct pseries_vas_window *txwin,
242 u64 *domain, u8 wintype)
243 {
244 int rc;
245
246 rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS);
247 if (rc)
248 return rc;
249 /*
250 * On PowerVM, the hypervisor setup and forwards the fault
251 * interrupt per window. So the IRQ setup and fault handling
252 * will be done for each open window separately.
253 */
254 txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq);
255 if (!txwin->fault_virq) {
256 pr_err("Failed irq mapping %d\n", txwin->fault_irq);
257 rc = -EINVAL;
258 goto out_win;
259 }
260
261 txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d",
262 txwin->vas_win.winid);
263 if (!txwin->name) {
264 rc = -ENOMEM;
265 goto out_irq;
266 }
267
268 rc = request_threaded_irq(txwin->fault_virq,
269 pseries_vas_irq_handler,
270 pseries_vas_fault_thread_fn, 0,
271 txwin->name, txwin);
272 if (rc) {
273 pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
274 txwin->vas_win.winid, txwin->fault_virq, rc);
275 goto out_free;
276 }
277
278 txwin->vas_win.wcreds_max = DEF_WIN_CREDS;
279
280 return 0;
281 out_free:
282 kfree(txwin->name);
283 out_irq:
284 irq_dispose_mapping(txwin->fault_virq);
285 out_win:
286 h_deallocate_vas_window(txwin->vas_win.winid);
287 return rc;
288 }
289
free_irq_setup(struct pseries_vas_window * txwin)290 static inline void free_irq_setup(struct pseries_vas_window *txwin)
291 {
292 free_irq(txwin->fault_virq, txwin);
293 kfree(txwin->name);
294 irq_dispose_mapping(txwin->fault_virq);
295 }
296
vas_allocate_window(int vas_id,u64 flags,enum vas_cop_type cop_type)297 static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
298 enum vas_cop_type cop_type)
299 {
300 long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
301 struct vas_cop_feat_caps *cop_feat_caps;
302 struct vas_caps *caps;
303 struct pseries_vas_window *txwin;
304 int rc;
305
306 txwin = kzalloc(sizeof(*txwin), GFP_KERNEL);
307 if (!txwin)
308 return ERR_PTR(-ENOMEM);
309
310 /*
311 * A VAS window can have many credits which means that many
312 * requests can be issued simultaneously. But the hypervisor
313 * restricts one credit per window.
314 * The hypervisor introduces 2 different types of credits:
315 * Default credit type (Uses normal priority FIFO):
316 * A limited number of credits are assigned to partitions
317 * based on processor entitlement. But these credits may be
318 * over-committed on a system depends on whether the CPUs
319 * are in shared or dedicated modes - that is, more requests
320 * may be issued across the system than NX can service at
321 * once which can result in paste command failure (RMA_busy).
322 * Then the process has to resend requests or fall-back to
323 * SW compression.
324 * Quality of Service (QoS) credit type (Uses high priority FIFO):
325 * To avoid NX HW contention, the system admins can assign
326 * QoS credits for each LPAR so that this partition is
327 * guaranteed access to NX resources. These credits are
328 * assigned to partitions via the HMC.
329 * Refer PAPR for more information.
330 *
331 * Allocate window with QoS credits if user requested. Otherwise
332 * default credits are used.
333 */
334 if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT)
335 caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE];
336 else
337 caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE];
338
339 cop_feat_caps = &caps->caps;
340
341 if (atomic_inc_return(&cop_feat_caps->nr_used_credits) >
342 atomic_read(&cop_feat_caps->nr_total_credits)) {
343 pr_err("Credits are not available to allocate window\n");
344 rc = -EINVAL;
345 goto out;
346 }
347
348 if (vas_id == -1) {
349 /*
350 * The user space is requesting to allocate a window on
351 * a VAS instance where the process is executing.
352 * On PowerVM, domain values are passed to the hypervisor
353 * to select VAS instance. Useful if the process is
354 * affinity to NUMA node.
355 * The hypervisor selects VAS instance if
356 * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values.
357 * The h_allocate_vas_window hcall is defined to take a
358 * domain values as specified by h_home_node_associativity,
359 * So no unpacking needs to be done.
360 */
361 rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain,
362 VPHN_FLAG_VCPU, hard_smp_processor_id());
363 if (rc != H_SUCCESS) {
364 pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc);
365 goto out;
366 }
367 }
368
369 txwin->pid = mfspr(SPRN_PID);
370
371 /*
372 * Allocate / Deallocate window hcalls and setup / free IRQs
373 * have to be protected with mutex.
374 * Open VAS window: Allocate window hcall and setup IRQ
375 * Close VAS window: Deallocate window hcall and free IRQ
376 * The hypervisor waits until all NX requests are
377 * completed before closing the window. So expects OS
378 * to handle NX faults, means IRQ can be freed only
379 * after the deallocate window hcall is returned.
380 * So once the window is closed with deallocate hcall before
381 * the IRQ is freed, it can be assigned to new allocate
382 * hcall with the same fault IRQ by the hypervisor. It can
383 * result in setup IRQ fail for the new window since the
384 * same fault IRQ is not freed by the OS before.
385 */
386 mutex_lock(&vas_pseries_mutex);
387 if (migration_in_progress)
388 rc = -EBUSY;
389 else
390 rc = allocate_setup_window(txwin, (u64 *)&domain[0],
391 cop_feat_caps->win_type);
392 mutex_unlock(&vas_pseries_mutex);
393 if (rc)
394 goto out;
395
396 /*
397 * Modify window and it is ready to use.
398 */
399 rc = h_modify_vas_window(txwin);
400 if (!rc)
401 rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
402 if (rc)
403 goto out_free;
404
405 txwin->win_type = cop_feat_caps->win_type;
406 mutex_lock(&vas_pseries_mutex);
407 /*
408 * Possible to lose the acquired credit with DLPAR core
409 * removal after the window is opened. So if there are any
410 * closed windows (means with lost credits), do not give new
411 * window to user space. New windows will be opened only
412 * after the existing windows are reopened when credits are
413 * available.
414 */
415 if (!caps->nr_close_wins) {
416 list_add(&txwin->win_list, &caps->list);
417 caps->nr_open_windows++;
418 mutex_unlock(&vas_pseries_mutex);
419 vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
420 return &txwin->vas_win;
421 }
422 mutex_unlock(&vas_pseries_mutex);
423
424 put_vas_user_win_ref(&txwin->vas_win.task_ref);
425 rc = -EBUSY;
426 pr_err("No credit is available to allocate window\n");
427
428 out_free:
429 /*
430 * Window is not operational. Free IRQ before closing
431 * window so that do not have to hold mutex.
432 */
433 free_irq_setup(txwin);
434 h_deallocate_vas_window(txwin->vas_win.winid);
435 out:
436 atomic_dec(&cop_feat_caps->nr_used_credits);
437 kfree(txwin);
438 return ERR_PTR(rc);
439 }
440
vas_paste_address(struct vas_window * vwin)441 static u64 vas_paste_address(struct vas_window *vwin)
442 {
443 struct pseries_vas_window *win;
444
445 win = container_of(vwin, struct pseries_vas_window, vas_win);
446 return win->win_addr;
447 }
448
deallocate_free_window(struct pseries_vas_window * win)449 static int deallocate_free_window(struct pseries_vas_window *win)
450 {
451 int rc = 0;
452
453 /*
454 * The hypervisor waits for all requests including faults
455 * are processed before closing the window - Means all
456 * credits have to be returned. In the case of fault
457 * request, a credit is returned after OS issues
458 * H_GET_NX_FAULT hcall.
459 * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW
460 * hcall.
461 */
462 rc = h_deallocate_vas_window(win->vas_win.winid);
463 if (!rc)
464 free_irq_setup(win);
465
466 return rc;
467 }
468
vas_deallocate_window(struct vas_window * vwin)469 static int vas_deallocate_window(struct vas_window *vwin)
470 {
471 struct pseries_vas_window *win;
472 struct vas_cop_feat_caps *caps;
473 int rc = 0;
474
475 if (!vwin)
476 return -EINVAL;
477
478 win = container_of(vwin, struct pseries_vas_window, vas_win);
479
480 /* Should not happen */
481 if (win->win_type >= VAS_MAX_FEAT_TYPE) {
482 pr_err("Window (%u): Invalid window type %u\n",
483 vwin->winid, win->win_type);
484 return -EINVAL;
485 }
486
487 caps = &vascaps[win->win_type].caps;
488 mutex_lock(&vas_pseries_mutex);
489 /*
490 * VAS window is already closed in the hypervisor when
491 * lost the credit or with migration. So just remove the entry
492 * from the list, remove task references and free vas_window
493 * struct.
494 */
495 if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
496 !(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
497 rc = deallocate_free_window(win);
498 if (rc) {
499 mutex_unlock(&vas_pseries_mutex);
500 return rc;
501 }
502 } else
503 vascaps[win->win_type].nr_close_wins--;
504
505 list_del(&win->win_list);
506 atomic_dec(&caps->nr_used_credits);
507 vascaps[win->win_type].nr_open_windows--;
508 mutex_unlock(&vas_pseries_mutex);
509
510 put_vas_user_win_ref(&vwin->task_ref);
511 mm_context_remove_vas_window(vwin->task_ref.mm);
512
513 kfree(win);
514 return 0;
515 }
516
517 static const struct vas_user_win_ops vops_pseries = {
518 .open_win = vas_allocate_window, /* Open and configure window */
519 .paste_addr = vas_paste_address, /* To do copy/paste */
520 .close_win = vas_deallocate_window, /* Close window */
521 };
522
523 /*
524 * Supporting only nx-gzip coprocessor type now, but this API code
525 * extended to other coprocessor types later.
526 */
vas_register_api_pseries(struct module * mod,enum vas_cop_type cop_type,const char * name)527 int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type,
528 const char *name)
529 {
530 if (!copypaste_feat)
531 return -ENOTSUPP;
532
533 return vas_register_coproc_api(mod, cop_type, name, &vops_pseries);
534 }
535 EXPORT_SYMBOL_GPL(vas_register_api_pseries);
536
vas_unregister_api_pseries(void)537 void vas_unregister_api_pseries(void)
538 {
539 vas_unregister_coproc_api();
540 }
541 EXPORT_SYMBOL_GPL(vas_unregister_api_pseries);
542
543 /*
544 * Get the specific capabilities based on the feature type.
545 * Right now supports GZIP default and GZIP QoS capabilities.
546 */
get_vas_capabilities(u8 feat,enum vas_cop_feat_type type,struct hv_vas_cop_feat_caps * hv_caps)547 static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
548 struct hv_vas_cop_feat_caps *hv_caps)
549 {
550 struct vas_cop_feat_caps *caps;
551 struct vas_caps *vcaps;
552 int rc = 0;
553
554 vcaps = &vascaps[type];
555 memset(vcaps, 0, sizeof(*vcaps));
556 INIT_LIST_HEAD(&vcaps->list);
557
558 vcaps->feat = feat;
559 caps = &vcaps->caps;
560
561 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat,
562 (u64)virt_to_phys(hv_caps));
563 if (rc)
564 return rc;
565
566 caps->user_mode = hv_caps->user_mode;
567 if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) {
568 pr_err("User space COPY/PASTE is not supported\n");
569 return -ENOTSUPP;
570 }
571
572 caps->descriptor = be64_to_cpu(hv_caps->descriptor);
573 caps->win_type = hv_caps->win_type;
574 if (caps->win_type >= VAS_MAX_FEAT_TYPE) {
575 pr_err("Unsupported window type %u\n", caps->win_type);
576 return -EINVAL;
577 }
578 caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds);
579 caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds);
580 atomic_set(&caps->nr_total_credits,
581 be16_to_cpu(hv_caps->target_lpar_creds));
582 if (feat == VAS_GZIP_DEF_FEAT) {
583 caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds);
584
585 if (caps->max_win_creds < DEF_WIN_CREDS) {
586 pr_err("Window creds(%u) > max allowed window creds(%u)\n",
587 DEF_WIN_CREDS, caps->max_win_creds);
588 return -EINVAL;
589 }
590 }
591
592 rc = sysfs_add_vas_caps(caps);
593 if (rc)
594 return rc;
595
596 copypaste_feat = true;
597
598 return 0;
599 }
600
601 /*
602 * VAS windows can be closed due to lost credits when the core is
603 * removed. So reopen them if credits are available due to DLPAR
604 * core add and set the window active status. When NX sees the page
605 * fault on the unmapped paste address, the kernel handles the fault
606 * by setting the remapping to new paste address if the window is
607 * active.
608 */
reconfig_open_windows(struct vas_caps * vcaps,int creds,bool migrate)609 static int reconfig_open_windows(struct vas_caps *vcaps, int creds,
610 bool migrate)
611 {
612 long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
613 struct vas_cop_feat_caps *caps = &vcaps->caps;
614 struct pseries_vas_window *win = NULL, *tmp;
615 int rc, mv_ents = 0;
616 int flag;
617
618 /*
619 * Nothing to do if there are no closed windows.
620 */
621 if (!vcaps->nr_close_wins)
622 return 0;
623
624 /*
625 * For the core removal, the hypervisor reduces the credits
626 * assigned to the LPAR and the kernel closes VAS windows
627 * in the hypervisor depends on reduced credits. The kernel
628 * uses LIFO (the last windows that are opened will be closed
629 * first) and expects to open in the same order when credits
630 * are available.
631 * For example, 40 windows are closed when the LPAR lost 2 cores
632 * (dedicated). If 1 core is added, this LPAR can have 20 more
633 * credits. It means the kernel can reopen 20 windows. So move
634 * 20 entries in the VAS windows lost and reopen next 20 windows.
635 * For partition migration, reopen all windows that are closed
636 * during resume.
637 */
638 if ((vcaps->nr_close_wins > creds) && !migrate)
639 mv_ents = vcaps->nr_close_wins - creds;
640
641 list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) {
642 if (!mv_ents)
643 break;
644
645 mv_ents--;
646 }
647
648 /*
649 * Open windows if they are closed only with migration or
650 * DLPAR (lost credit) before.
651 */
652 if (migrate)
653 flag = VAS_WIN_MIGRATE_CLOSE;
654 else
655 flag = VAS_WIN_NO_CRED_CLOSE;
656
657 list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) {
658 /*
659 * This window is closed with DLPAR and migration events.
660 * So reopen the window with the last event.
661 * The user space is not suspended with the current
662 * migration notifier. So the user space can issue DLPAR
663 * CPU hotplug while migration in progress. In this case
664 * this window will be opened with the last event.
665 */
666 if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
667 (win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
668 win->vas_win.status &= ~flag;
669 continue;
670 }
671
672 /*
673 * Nothing to do on this window if it is not closed
674 * with this flag
675 */
676 if (!(win->vas_win.status & flag))
677 continue;
678
679 rc = allocate_setup_window(win, (u64 *)&domain[0],
680 caps->win_type);
681 if (rc)
682 return rc;
683
684 rc = h_modify_vas_window(win);
685 if (rc)
686 goto out;
687
688 mutex_lock(&win->vas_win.task_ref.mmap_mutex);
689 /*
690 * Set window status to active
691 */
692 win->vas_win.status &= ~flag;
693 mutex_unlock(&win->vas_win.task_ref.mmap_mutex);
694 win->win_type = caps->win_type;
695 if (!--vcaps->nr_close_wins)
696 break;
697 }
698
699 return 0;
700 out:
701 /*
702 * Window modify HCALL failed. So close the window to the
703 * hypervisor and return.
704 */
705 free_irq_setup(win);
706 h_deallocate_vas_window(win->vas_win.winid);
707 return rc;
708 }
709
710 /*
711 * The hypervisor reduces the available credits if the LPAR lost core. It
712 * means the excessive windows should not be active and the user space
713 * should not be using these windows to send compression requests to NX.
714 * So the kernel closes the excessive windows and unmap the paste address
715 * such that the user space receives paste instruction failure. Then up to
716 * the user space to fall back to SW compression and manage with the
717 * existing windows.
718 */
reconfig_close_windows(struct vas_caps * vcap,int excess_creds,bool migrate)719 static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
720 bool migrate)
721 {
722 struct pseries_vas_window *win, *tmp;
723 struct vas_user_win_ref *task_ref;
724 struct vm_area_struct *vma;
725 int rc = 0, flag;
726
727 if (migrate)
728 flag = VAS_WIN_MIGRATE_CLOSE;
729 else
730 flag = VAS_WIN_NO_CRED_CLOSE;
731
732 list_for_each_entry_safe(win, tmp, &vcap->list, win_list) {
733 /*
734 * This window is already closed due to lost credit
735 * or for migration before. Go for next window.
736 * For migration, nothing to do since this window
737 * closed for DLPAR and will be reopened even on
738 * the destination system with other DLPAR operation.
739 */
740 if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) ||
741 (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) {
742 win->vas_win.status |= flag;
743 continue;
744 }
745
746 task_ref = &win->vas_win.task_ref;
747 mutex_lock(&task_ref->mmap_mutex);
748 vma = task_ref->vma;
749 /*
750 * Number of available credits are reduced, So select
751 * and close windows.
752 */
753 win->vas_win.status |= flag;
754
755 mmap_write_lock(task_ref->mm);
756 /*
757 * vma is set in the original mapping. But this mapping
758 * is done with mmap() after the window is opened with ioctl.
759 * so we may not see the original mapping if the core remove
760 * is done before the original mmap() and after the ioctl.
761 */
762 if (vma)
763 zap_page_range(vma, vma->vm_start,
764 vma->vm_end - vma->vm_start);
765
766 mmap_write_unlock(task_ref->mm);
767 mutex_unlock(&task_ref->mmap_mutex);
768 /*
769 * Close VAS window in the hypervisor, but do not
770 * free vas_window struct since it may be reused
771 * when the credit is available later (DLPAR with
772 * adding cores). This struct will be used
773 * later when the process issued with close(FD).
774 */
775 rc = deallocate_free_window(win);
776 /*
777 * This failure is from the hypervisor.
778 * No way to stop migration for these failures.
779 * So ignore error and continue closing other windows.
780 */
781 if (rc && !migrate)
782 return rc;
783
784 vcap->nr_close_wins++;
785
786 /*
787 * For migration, do not depend on lpar_creds in case if
788 * mismatch with the hypervisor value (should not happen).
789 * So close all active windows in the list and will be
790 * reopened windows based on the new lpar_creds on the
791 * destination system during resume.
792 */
793 if (!migrate && !--excess_creds)
794 break;
795 }
796
797 return 0;
798 }
799
800 /*
801 * Get new VAS capabilities when the core add/removal configuration
802 * changes. Reconfig window configurations based on the credits
803 * availability from this new capabilities.
804 */
vas_reconfig_capabilties(u8 type,int new_nr_creds)805 int vas_reconfig_capabilties(u8 type, int new_nr_creds)
806 {
807 struct vas_cop_feat_caps *caps;
808 int old_nr_creds;
809 struct vas_caps *vcaps;
810 int rc = 0, nr_active_wins;
811
812 if (type >= VAS_MAX_FEAT_TYPE) {
813 pr_err("Invalid credit type %d\n", type);
814 return -EINVAL;
815 }
816
817 vcaps = &vascaps[type];
818 caps = &vcaps->caps;
819
820 mutex_lock(&vas_pseries_mutex);
821
822 old_nr_creds = atomic_read(&caps->nr_total_credits);
823
824 atomic_set(&caps->nr_total_credits, new_nr_creds);
825 /*
826 * The total number of available credits may be decreased or
827 * increased with DLPAR operation. Means some windows have to be
828 * closed / reopened. Hold the vas_pseries_mutex so that the
829 * user space can not open new windows.
830 */
831 if (old_nr_creds < new_nr_creds) {
832 /*
833 * If the existing target credits is less than the new
834 * target, reopen windows if they are closed due to
835 * the previous DLPAR (core removal).
836 */
837 rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds,
838 false);
839 } else {
840 /*
841 * # active windows is more than new LPAR available
842 * credits. So close the excessive windows.
843 * On pseries, each window will have 1 credit.
844 */
845 nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins;
846 if (nr_active_wins > new_nr_creds)
847 rc = reconfig_close_windows(vcaps,
848 nr_active_wins - new_nr_creds,
849 false);
850 }
851
852 mutex_unlock(&vas_pseries_mutex);
853 return rc;
854 }
855
pseries_vas_dlpar_cpu(void)856 int pseries_vas_dlpar_cpu(void)
857 {
858 int new_nr_creds, rc;
859
860 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
861 vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
862 (u64)virt_to_phys(&hv_cop_caps));
863 if (!rc) {
864 new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
865 rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE, new_nr_creds);
866 }
867
868 if (rc)
869 pr_err("Failed reconfig VAS capabilities with DLPAR\n");
870
871 return rc;
872 }
873
874 /*
875 * Total number of default credits available (target_credits)
876 * in LPAR depends on number of cores configured. It varies based on
877 * whether processors are in shared mode or dedicated mode.
878 * Get the notifier when CPU configuration is changed with DLPAR
879 * operation so that get the new target_credits (vas default capabilities)
880 * and then update the existing windows usage if needed.
881 */
pseries_vas_notifier(struct notifier_block * nb,unsigned long action,void * data)882 static int pseries_vas_notifier(struct notifier_block *nb,
883 unsigned long action, void *data)
884 {
885 struct of_reconfig_data *rd = data;
886 struct device_node *dn = rd->dn;
887 const __be32 *intserv = NULL;
888 int len;
889
890 /*
891 * For shared CPU partition, the hypervisor assigns total credits
892 * based on entitled core capacity. So updating VAS windows will
893 * be called from lparcfg_write().
894 */
895 if (is_shared_processor())
896 return NOTIFY_OK;
897
898 if ((action == OF_RECONFIG_ATTACH_NODE) ||
899 (action == OF_RECONFIG_DETACH_NODE))
900 intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s",
901 &len);
902 /*
903 * Processor config is not changed
904 */
905 if (!intserv)
906 return NOTIFY_OK;
907
908 return pseries_vas_dlpar_cpu();
909 }
910
911 static struct notifier_block pseries_vas_nb = {
912 .notifier_call = pseries_vas_notifier,
913 };
914
915 /*
916 * For LPM, all windows have to be closed on the source partition
917 * before migration and reopen them on the destination partition
918 * after migration. So closing windows during suspend and
919 * reopen them during resume.
920 */
vas_migration_handler(int action)921 int vas_migration_handler(int action)
922 {
923 struct vas_cop_feat_caps *caps;
924 int old_nr_creds, new_nr_creds = 0;
925 struct vas_caps *vcaps;
926 int i, rc = 0;
927
928 /*
929 * NX-GZIP is not enabled. Nothing to do for migration.
930 */
931 if (!copypaste_feat)
932 return rc;
933
934 mutex_lock(&vas_pseries_mutex);
935
936 if (action == VAS_SUSPEND)
937 migration_in_progress = true;
938 else
939 migration_in_progress = false;
940
941 for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) {
942 vcaps = &vascaps[i];
943 caps = &vcaps->caps;
944 old_nr_creds = atomic_read(&caps->nr_total_credits);
945
946 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
947 vcaps->feat,
948 (u64)virt_to_phys(&hv_cop_caps));
949 if (!rc) {
950 new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
951 /*
952 * Should not happen. But incase print messages, close
953 * all windows in the list during suspend and reopen
954 * windows based on new lpar_creds on the destination
955 * system.
956 */
957 if (old_nr_creds != new_nr_creds) {
958 pr_err("Target credits mismatch with the hypervisor\n");
959 pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n",
960 action, old_nr_creds, new_nr_creds);
961 pr_err("Used creds: %d, Active creds: %d\n",
962 atomic_read(&caps->nr_used_credits),
963 vcaps->nr_open_windows - vcaps->nr_close_wins);
964 }
965 } else {
966 pr_err("state(%d): Get VAS capabilities failed with %d\n",
967 action, rc);
968 /*
969 * We can not stop migration with the current lpm
970 * implementation. So continue closing all windows in
971 * the list (during suspend) and return without
972 * opening windows (during resume) if VAS capabilities
973 * HCALL failed.
974 */
975 if (action == VAS_RESUME)
976 goto out;
977 }
978
979 switch (action) {
980 case VAS_SUSPEND:
981 rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows,
982 true);
983 break;
984 case VAS_RESUME:
985 atomic_set(&caps->nr_total_credits, new_nr_creds);
986 rc = reconfig_open_windows(vcaps, new_nr_creds, true);
987 break;
988 default:
989 /* should not happen */
990 pr_err("Invalid migration action %d\n", action);
991 rc = -EINVAL;
992 goto out;
993 }
994
995 /*
996 * Ignore errors during suspend and return for resume.
997 */
998 if (rc && (action == VAS_RESUME))
999 goto out;
1000 }
1001
1002 out:
1003 mutex_unlock(&vas_pseries_mutex);
1004 return rc;
1005 }
1006
pseries_vas_init(void)1007 static int __init pseries_vas_init(void)
1008 {
1009 struct hv_vas_all_caps *hv_caps;
1010 int rc = 0;
1011
1012 /*
1013 * Linux supports user space COPY/PASTE only with Radix
1014 */
1015 if (!radix_enabled()) {
1016 pr_err("API is supported only with radix page tables\n");
1017 return -ENOTSUPP;
1018 }
1019
1020 hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL);
1021 if (!hv_caps)
1022 return -ENOMEM;
1023 /*
1024 * Get VAS overall capabilities by passing 0 to feature type.
1025 */
1026 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0,
1027 (u64)virt_to_phys(hv_caps));
1028 if (rc)
1029 goto out;
1030
1031 caps_all.descriptor = be64_to_cpu(hv_caps->descriptor);
1032 caps_all.feat_type = be64_to_cpu(hv_caps->feat_type);
1033
1034 sysfs_pseries_vas_init(&caps_all);
1035
1036 /*
1037 * QOS capabilities available
1038 */
1039 if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) {
1040 rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT,
1041 VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps);
1042
1043 if (rc)
1044 goto out;
1045 }
1046 /*
1047 * Default capabilities available
1048 */
1049 if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT)
1050 rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT,
1051 VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps);
1052
1053 if (!rc && copypaste_feat) {
1054 if (firmware_has_feature(FW_FEATURE_LPAR))
1055 of_reconfig_notifier_register(&pseries_vas_nb);
1056
1057 pr_info("GZIP feature is available\n");
1058 } else {
1059 /*
1060 * Should not happen, but only when get default
1061 * capabilities HCALL failed. So disable copy paste
1062 * feature.
1063 */
1064 copypaste_feat = false;
1065 }
1066
1067 out:
1068 kfree(hv_caps);
1069 return rc;
1070 }
1071 machine_device_initcall(pseries, pseries_vas_init);
1072