1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Support for Partition Mobility/Migration
4 *
5 * Copyright (C) 2010 Nathan Fontenot
6 * Copyright (C) 2010 IBM Corporation
7 */
8
9
10 #define pr_fmt(fmt) "mobility: " fmt
11
12 #include <linux/cpu.h>
13 #include <linux/kernel.h>
14 #include <linux/kobject.h>
15 #include <linux/nmi.h>
16 #include <linux/sched.h>
17 #include <linux/smp.h>
18 #include <linux/stat.h>
19 #include <linux/stop_machine.h>
20 #include <linux/completion.h>
21 #include <linux/device.h>
22 #include <linux/delay.h>
23 #include <linux/slab.h>
24 #include <linux/stringify.h>
25
26 #include <asm/machdep.h>
27 #include <asm/rtas.h>
28 #include "pseries.h"
29 #include "vas.h" /* vas_migration_handler() */
30 #include "../../kernel/cacheinfo.h"
31
32 static struct kobject *mobility_kobj;
33
34 struct update_props_workarea {
35 __be32 phandle;
36 __be32 state;
37 __be64 reserved;
38 __be32 nprops;
39 } __packed;
40
41 #define NODE_ACTION_MASK 0xff000000
42 #define NODE_COUNT_MASK 0x00ffffff
43
44 #define DELETE_DT_NODE 0x01000000
45 #define UPDATE_DT_NODE 0x02000000
46 #define ADD_DT_NODE 0x03000000
47
48 #define MIGRATION_SCOPE (1)
49 #define PRRN_SCOPE -2
50
51 #ifdef CONFIG_PPC_WATCHDOG
52 static unsigned int nmi_wd_lpm_factor = 200;
53
54 #ifdef CONFIG_SYSCTL
55 static struct ctl_table nmi_wd_lpm_factor_ctl_table[] = {
56 {
57 .procname = "nmi_wd_lpm_factor",
58 .data = &nmi_wd_lpm_factor,
59 .maxlen = sizeof(int),
60 .mode = 0644,
61 .proc_handler = proc_douintvec_minmax,
62 },
63 {}
64 };
65 static struct ctl_table nmi_wd_lpm_factor_sysctl_root[] = {
66 {
67 .procname = "kernel",
68 .mode = 0555,
69 .child = nmi_wd_lpm_factor_ctl_table,
70 },
71 {}
72 };
73
register_nmi_wd_lpm_factor_sysctl(void)74 static int __init register_nmi_wd_lpm_factor_sysctl(void)
75 {
76 register_sysctl_table(nmi_wd_lpm_factor_sysctl_root);
77
78 return 0;
79 }
80 device_initcall(register_nmi_wd_lpm_factor_sysctl);
81 #endif /* CONFIG_SYSCTL */
82 #endif /* CONFIG_PPC_WATCHDOG */
83
mobility_rtas_call(int token,char * buf,s32 scope)84 static int mobility_rtas_call(int token, char *buf, s32 scope)
85 {
86 int rc;
87
88 spin_lock(&rtas_data_buf_lock);
89
90 memcpy(rtas_data_buf, buf, RTAS_DATA_BUF_SIZE);
91 rc = rtas_call(token, 2, 1, NULL, rtas_data_buf, scope);
92 memcpy(buf, rtas_data_buf, RTAS_DATA_BUF_SIZE);
93
94 spin_unlock(&rtas_data_buf_lock);
95 return rc;
96 }
97
delete_dt_node(struct device_node * dn)98 static int delete_dt_node(struct device_node *dn)
99 {
100 struct device_node *pdn;
101 bool is_platfac;
102
103 pdn = of_get_parent(dn);
104 is_platfac = of_node_is_type(dn, "ibm,platform-facilities") ||
105 of_node_is_type(pdn, "ibm,platform-facilities");
106 of_node_put(pdn);
107
108 /*
109 * The drivers that bind to nodes in the platform-facilities
110 * hierarchy don't support node removal, and the removal directive
111 * from firmware is always followed by an add of an equivalent
112 * node. The capability (e.g. RNG, encryption, compression)
113 * represented by the node is never interrupted by the migration.
114 * So ignore changes to this part of the tree.
115 */
116 if (is_platfac) {
117 pr_notice("ignoring remove operation for %pOFfp\n", dn);
118 return 0;
119 }
120
121 pr_debug("removing node %pOFfp\n", dn);
122 dlpar_detach_node(dn);
123 return 0;
124 }
125
update_dt_property(struct device_node * dn,struct property ** prop,const char * name,u32 vd,char * value)126 static int update_dt_property(struct device_node *dn, struct property **prop,
127 const char *name, u32 vd, char *value)
128 {
129 struct property *new_prop = *prop;
130 int more = 0;
131
132 /* A negative 'vd' value indicates that only part of the new property
133 * value is contained in the buffer and we need to call
134 * ibm,update-properties again to get the rest of the value.
135 *
136 * A negative value is also the two's compliment of the actual value.
137 */
138 if (vd & 0x80000000) {
139 vd = ~vd + 1;
140 more = 1;
141 }
142
143 if (new_prop) {
144 /* partial property fixup */
145 char *new_data = kzalloc(new_prop->length + vd, GFP_KERNEL);
146 if (!new_data)
147 return -ENOMEM;
148
149 memcpy(new_data, new_prop->value, new_prop->length);
150 memcpy(new_data + new_prop->length, value, vd);
151
152 kfree(new_prop->value);
153 new_prop->value = new_data;
154 new_prop->length += vd;
155 } else {
156 new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
157 if (!new_prop)
158 return -ENOMEM;
159
160 new_prop->name = kstrdup(name, GFP_KERNEL);
161 if (!new_prop->name) {
162 kfree(new_prop);
163 return -ENOMEM;
164 }
165
166 new_prop->length = vd;
167 new_prop->value = kzalloc(new_prop->length, GFP_KERNEL);
168 if (!new_prop->value) {
169 kfree(new_prop->name);
170 kfree(new_prop);
171 return -ENOMEM;
172 }
173
174 memcpy(new_prop->value, value, vd);
175 *prop = new_prop;
176 }
177
178 if (!more) {
179 pr_debug("updating node %pOF property %s\n", dn, name);
180 of_update_property(dn, new_prop);
181 *prop = NULL;
182 }
183
184 return 0;
185 }
186
update_dt_node(struct device_node * dn,s32 scope)187 static int update_dt_node(struct device_node *dn, s32 scope)
188 {
189 struct update_props_workarea *upwa;
190 struct property *prop = NULL;
191 int i, rc, rtas_rc;
192 char *prop_data;
193 char *rtas_buf;
194 int update_properties_token;
195 u32 nprops;
196 u32 vd;
197
198 update_properties_token = rtas_token("ibm,update-properties");
199 if (update_properties_token == RTAS_UNKNOWN_SERVICE)
200 return -EINVAL;
201
202 rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
203 if (!rtas_buf)
204 return -ENOMEM;
205
206 upwa = (struct update_props_workarea *)&rtas_buf[0];
207 upwa->phandle = cpu_to_be32(dn->phandle);
208
209 do {
210 rtas_rc = mobility_rtas_call(update_properties_token, rtas_buf,
211 scope);
212 if (rtas_rc < 0)
213 break;
214
215 prop_data = rtas_buf + sizeof(*upwa);
216 nprops = be32_to_cpu(upwa->nprops);
217
218 /* On the first call to ibm,update-properties for a node the
219 * first property value descriptor contains an empty
220 * property name, the property value length encoded as u32,
221 * and the property value is the node path being updated.
222 */
223 if (*prop_data == 0) {
224 prop_data++;
225 vd = be32_to_cpu(*(__be32 *)prop_data);
226 prop_data += vd + sizeof(vd);
227 nprops--;
228 }
229
230 for (i = 0; i < nprops; i++) {
231 char *prop_name;
232
233 prop_name = prop_data;
234 prop_data += strlen(prop_name) + 1;
235 vd = be32_to_cpu(*(__be32 *)prop_data);
236 prop_data += sizeof(vd);
237
238 switch (vd) {
239 case 0x00000000:
240 /* name only property, nothing to do */
241 break;
242
243 case 0x80000000:
244 of_remove_property(dn, of_find_property(dn,
245 prop_name, NULL));
246 prop = NULL;
247 break;
248
249 default:
250 rc = update_dt_property(dn, &prop, prop_name,
251 vd, prop_data);
252 if (rc) {
253 pr_err("updating %s property failed: %d\n",
254 prop_name, rc);
255 }
256
257 prop_data += vd;
258 break;
259 }
260
261 cond_resched();
262 }
263
264 cond_resched();
265 } while (rtas_rc == 1);
266
267 kfree(rtas_buf);
268 return 0;
269 }
270
add_dt_node(struct device_node * parent_dn,__be32 drc_index)271 static int add_dt_node(struct device_node *parent_dn, __be32 drc_index)
272 {
273 struct device_node *dn;
274 int rc;
275
276 dn = dlpar_configure_connector(drc_index, parent_dn);
277 if (!dn)
278 return -ENOENT;
279
280 /*
281 * Since delete_dt_node() ignores this node type, this is the
282 * necessary counterpart. We also know that a platform-facilities
283 * node returned from dlpar_configure_connector() has children
284 * attached, and dlpar_attach_node() only adds the parent, leaking
285 * the children. So ignore these on the add side for now.
286 */
287 if (of_node_is_type(dn, "ibm,platform-facilities")) {
288 pr_notice("ignoring add operation for %pOF\n", dn);
289 dlpar_free_cc_nodes(dn);
290 return 0;
291 }
292
293 rc = dlpar_attach_node(dn, parent_dn);
294 if (rc)
295 dlpar_free_cc_nodes(dn);
296
297 pr_debug("added node %pOFfp\n", dn);
298
299 return rc;
300 }
301
pseries_devicetree_update(s32 scope)302 static int pseries_devicetree_update(s32 scope)
303 {
304 char *rtas_buf;
305 __be32 *data;
306 int update_nodes_token;
307 int rc;
308
309 update_nodes_token = rtas_token("ibm,update-nodes");
310 if (update_nodes_token == RTAS_UNKNOWN_SERVICE)
311 return 0;
312
313 rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
314 if (!rtas_buf)
315 return -ENOMEM;
316
317 do {
318 rc = mobility_rtas_call(update_nodes_token, rtas_buf, scope);
319 if (rc && rc != 1)
320 break;
321
322 data = (__be32 *)rtas_buf + 4;
323 while (be32_to_cpu(*data) & NODE_ACTION_MASK) {
324 int i;
325 u32 action = be32_to_cpu(*data) & NODE_ACTION_MASK;
326 u32 node_count = be32_to_cpu(*data) & NODE_COUNT_MASK;
327
328 data++;
329
330 for (i = 0; i < node_count; i++) {
331 struct device_node *np;
332 __be32 phandle = *data++;
333 __be32 drc_index;
334
335 np = of_find_node_by_phandle(be32_to_cpu(phandle));
336 if (!np) {
337 pr_warn("Failed lookup: phandle 0x%x for action 0x%x\n",
338 be32_to_cpu(phandle), action);
339 continue;
340 }
341
342 switch (action) {
343 case DELETE_DT_NODE:
344 delete_dt_node(np);
345 break;
346 case UPDATE_DT_NODE:
347 update_dt_node(np, scope);
348 break;
349 case ADD_DT_NODE:
350 drc_index = *data++;
351 add_dt_node(np, drc_index);
352 break;
353 }
354
355 of_node_put(np);
356 cond_resched();
357 }
358 }
359
360 cond_resched();
361 } while (rc == 1);
362
363 kfree(rtas_buf);
364 return rc;
365 }
366
post_mobility_fixup(void)367 void post_mobility_fixup(void)
368 {
369 int rc;
370
371 rtas_activate_firmware();
372
373 /*
374 * We don't want CPUs to go online/offline while the device
375 * tree is being updated.
376 */
377 cpus_read_lock();
378
379 /*
380 * It's common for the destination firmware to replace cache
381 * nodes. Release all of the cacheinfo hierarchy's references
382 * before updating the device tree.
383 */
384 cacheinfo_teardown();
385
386 rc = pseries_devicetree_update(MIGRATION_SCOPE);
387 if (rc)
388 pr_err("device tree update failed: %d\n", rc);
389
390 cacheinfo_rebuild();
391
392 cpus_read_unlock();
393
394 /* Possibly switch to a new L1 flush type */
395 pseries_setup_security_mitigations();
396
397 /* Reinitialise system information for hv-24x7 */
398 read_24x7_sys_info();
399
400 return;
401 }
402
poll_vasi_state(u64 handle,unsigned long * res)403 static int poll_vasi_state(u64 handle, unsigned long *res)
404 {
405 unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
406 long hvrc;
407 int ret;
408
409 hvrc = plpar_hcall(H_VASI_STATE, retbuf, handle);
410 switch (hvrc) {
411 case H_SUCCESS:
412 ret = 0;
413 *res = retbuf[0];
414 break;
415 case H_PARAMETER:
416 ret = -EINVAL;
417 break;
418 case H_FUNCTION:
419 ret = -EOPNOTSUPP;
420 break;
421 case H_HARDWARE:
422 default:
423 pr_err("unexpected H_VASI_STATE result %ld\n", hvrc);
424 ret = -EIO;
425 break;
426 }
427 return ret;
428 }
429
wait_for_vasi_session_suspending(u64 handle)430 static int wait_for_vasi_session_suspending(u64 handle)
431 {
432 unsigned long state;
433 int ret;
434
435 /*
436 * Wait for transition from H_VASI_ENABLED to
437 * H_VASI_SUSPENDING. Treat anything else as an error.
438 */
439 while (true) {
440 ret = poll_vasi_state(handle, &state);
441
442 if (ret != 0 || state == H_VASI_SUSPENDING) {
443 break;
444 } else if (state == H_VASI_ENABLED) {
445 ssleep(1);
446 } else {
447 pr_err("unexpected H_VASI_STATE result %lu\n", state);
448 ret = -EIO;
449 break;
450 }
451 }
452
453 /*
454 * Proceed even if H_VASI_STATE is unavailable. If H_JOIN or
455 * ibm,suspend-me are also unimplemented, we'll recover then.
456 */
457 if (ret == -EOPNOTSUPP)
458 ret = 0;
459
460 return ret;
461 }
462
wait_for_vasi_session_completed(u64 handle)463 static void wait_for_vasi_session_completed(u64 handle)
464 {
465 unsigned long state = 0;
466 int ret;
467
468 pr_info("waiting for memory transfer to complete...\n");
469
470 /*
471 * Wait for transition from H_VASI_RESUMED to H_VASI_COMPLETED.
472 */
473 while (true) {
474 ret = poll_vasi_state(handle, &state);
475
476 /*
477 * If the memory transfer is already complete and the migration
478 * has been cleaned up by the hypervisor, H_PARAMETER is return,
479 * which is translate in EINVAL by poll_vasi_state().
480 */
481 if (ret == -EINVAL || (!ret && state == H_VASI_COMPLETED)) {
482 pr_info("memory transfer completed.\n");
483 break;
484 }
485
486 if (ret) {
487 pr_err("H_VASI_STATE return error (%d)\n", ret);
488 break;
489 }
490
491 if (state != H_VASI_RESUMED) {
492 pr_err("unexpected H_VASI_STATE result %lu\n", state);
493 break;
494 }
495
496 msleep(500);
497 }
498 }
499
prod_single(unsigned int target_cpu)500 static void prod_single(unsigned int target_cpu)
501 {
502 long hvrc;
503 int hwid;
504
505 hwid = get_hard_smp_processor_id(target_cpu);
506 hvrc = plpar_hcall_norets(H_PROD, hwid);
507 if (hvrc == H_SUCCESS)
508 return;
509 pr_err_ratelimited("H_PROD of CPU %u (hwid %d) error: %ld\n",
510 target_cpu, hwid, hvrc);
511 }
512
prod_others(void)513 static void prod_others(void)
514 {
515 unsigned int cpu;
516
517 for_each_online_cpu(cpu) {
518 if (cpu != smp_processor_id())
519 prod_single(cpu);
520 }
521 }
522
clamp_slb_size(void)523 static u16 clamp_slb_size(void)
524 {
525 #ifdef CONFIG_PPC_64S_HASH_MMU
526 u16 prev = mmu_slb_size;
527
528 slb_set_size(SLB_MIN_SIZE);
529
530 return prev;
531 #else
532 return 0;
533 #endif
534 }
535
do_suspend(void)536 static int do_suspend(void)
537 {
538 u16 saved_slb_size;
539 int status;
540 int ret;
541
542 pr_info("calling ibm,suspend-me on CPU %i\n", smp_processor_id());
543
544 /*
545 * The destination processor model may have fewer SLB entries
546 * than the source. We reduce mmu_slb_size to a safe minimum
547 * before suspending in order to minimize the possibility of
548 * programming non-existent entries on the destination. If
549 * suspend fails, we restore it before returning. On success
550 * the OF reconfig path will update it from the new device
551 * tree after resuming on the destination.
552 */
553 saved_slb_size = clamp_slb_size();
554
555 ret = rtas_ibm_suspend_me(&status);
556 if (ret != 0) {
557 pr_err("ibm,suspend-me error: %d\n", status);
558 slb_set_size(saved_slb_size);
559 }
560
561 return ret;
562 }
563
564 /**
565 * struct pseries_suspend_info - State shared between CPUs for join/suspend.
566 * @counter: Threads are to increment this upon resuming from suspend
567 * or if an error is received from H_JOIN. The thread which performs
568 * the first increment (i.e. sets it to 1) is responsible for
569 * waking the other threads.
570 * @done: False if join/suspend is in progress. True if the operation is
571 * complete (successful or not).
572 */
573 struct pseries_suspend_info {
574 atomic_t counter;
575 bool done;
576 };
577
do_join(void * arg)578 static int do_join(void *arg)
579 {
580 struct pseries_suspend_info *info = arg;
581 atomic_t *counter = &info->counter;
582 long hvrc;
583 int ret;
584
585 retry:
586 /* Must ensure MSR.EE off for H_JOIN. */
587 hard_irq_disable();
588 hvrc = plpar_hcall_norets(H_JOIN);
589
590 switch (hvrc) {
591 case H_CONTINUE:
592 /*
593 * All other CPUs are offline or in H_JOIN. This CPU
594 * attempts the suspend.
595 */
596 ret = do_suspend();
597 break;
598 case H_SUCCESS:
599 /*
600 * The suspend is complete and this cpu has received a
601 * prod, or we've received a stray prod from unrelated
602 * code (e.g. paravirt spinlocks) and we need to join
603 * again.
604 *
605 * This barrier orders the return from H_JOIN above vs
606 * the load of info->done. It pairs with the barrier
607 * in the wakeup/prod path below.
608 */
609 smp_mb();
610 if (READ_ONCE(info->done) == false) {
611 pr_info_ratelimited("premature return from H_JOIN on CPU %i, retrying",
612 smp_processor_id());
613 goto retry;
614 }
615 ret = 0;
616 break;
617 case H_BAD_MODE:
618 case H_HARDWARE:
619 default:
620 ret = -EIO;
621 pr_err_ratelimited("H_JOIN error %ld on CPU %i\n",
622 hvrc, smp_processor_id());
623 break;
624 }
625
626 if (atomic_inc_return(counter) == 1) {
627 pr_info("CPU %u waking all threads\n", smp_processor_id());
628 WRITE_ONCE(info->done, true);
629 /*
630 * This barrier orders the store to info->done vs subsequent
631 * H_PRODs to wake the other CPUs. It pairs with the barrier
632 * in the H_SUCCESS case above.
633 */
634 smp_mb();
635 prod_others();
636 }
637 /*
638 * Execution may have been suspended for several seconds, so
639 * reset the watchdog.
640 */
641 touch_nmi_watchdog();
642 return ret;
643 }
644
645 /*
646 * Abort reason code byte 0. We use only the 'Migrating partition' value.
647 */
648 enum vasi_aborting_entity {
649 ORCHESTRATOR = 1,
650 VSP_SOURCE = 2,
651 PARTITION_FIRMWARE = 3,
652 PLATFORM_FIRMWARE = 4,
653 VSP_TARGET = 5,
654 MIGRATING_PARTITION = 6,
655 };
656
pseries_cancel_migration(u64 handle,int err)657 static void pseries_cancel_migration(u64 handle, int err)
658 {
659 u32 reason_code;
660 u32 detail;
661 u8 entity;
662 long hvrc;
663
664 entity = MIGRATING_PARTITION;
665 detail = abs(err) & 0xffffff;
666 reason_code = (entity << 24) | detail;
667
668 hvrc = plpar_hcall_norets(H_VASI_SIGNAL, handle,
669 H_VASI_SIGNAL_CANCEL, reason_code);
670 if (hvrc)
671 pr_err("H_VASI_SIGNAL error: %ld\n", hvrc);
672 }
673
pseries_suspend(u64 handle)674 static int pseries_suspend(u64 handle)
675 {
676 const unsigned int max_attempts = 5;
677 unsigned int retry_interval_ms = 1;
678 unsigned int attempt = 1;
679 int ret;
680
681 while (true) {
682 struct pseries_suspend_info info;
683 unsigned long vasi_state;
684 int vasi_err;
685
686 info = (struct pseries_suspend_info) {
687 .counter = ATOMIC_INIT(0),
688 .done = false,
689 };
690
691 ret = stop_machine(do_join, &info, cpu_online_mask);
692 if (ret == 0)
693 break;
694 /*
695 * Encountered an error. If the VASI stream is still
696 * in Suspending state, it's likely a transient
697 * condition related to some device in the partition
698 * and we can retry in the hope that the cause has
699 * cleared after some delay.
700 *
701 * A better design would allow drivers etc to prepare
702 * for the suspend and avoid conditions which prevent
703 * the suspend from succeeding. For now, we have this
704 * mitigation.
705 */
706 pr_notice("Partition suspend attempt %u of %u error: %d\n",
707 attempt, max_attempts, ret);
708
709 if (attempt == max_attempts)
710 break;
711
712 vasi_err = poll_vasi_state(handle, &vasi_state);
713 if (vasi_err == 0) {
714 if (vasi_state != H_VASI_SUSPENDING) {
715 pr_notice("VASI state %lu after failed suspend\n",
716 vasi_state);
717 break;
718 }
719 } else if (vasi_err != -EOPNOTSUPP) {
720 pr_err("VASI state poll error: %d", vasi_err);
721 break;
722 }
723
724 pr_notice("Will retry partition suspend after %u ms\n",
725 retry_interval_ms);
726
727 msleep(retry_interval_ms);
728 retry_interval_ms *= 10;
729 attempt++;
730 }
731
732 return ret;
733 }
734
pseries_migrate_partition(u64 handle)735 static int pseries_migrate_partition(u64 handle)
736 {
737 int ret;
738 unsigned int factor = 0;
739
740 #ifdef CONFIG_PPC_WATCHDOG
741 factor = nmi_wd_lpm_factor;
742 #endif
743 /*
744 * When the migration is initiated, the hypervisor changes VAS
745 * mappings to prepare before OS gets the notification and
746 * closes all VAS windows. NX generates continuous faults during
747 * this time and the user space can not differentiate these
748 * faults from the migration event. So reduce this time window
749 * by closing VAS windows at the beginning of this function.
750 */
751 vas_migration_handler(VAS_SUSPEND);
752
753 ret = wait_for_vasi_session_suspending(handle);
754 if (ret)
755 goto out;
756
757 if (factor)
758 watchdog_nmi_set_timeout_pct(factor);
759
760 ret = pseries_suspend(handle);
761 if (ret == 0) {
762 post_mobility_fixup();
763 /*
764 * Wait until the memory transfer is complete, so that the user
765 * space process returns from the syscall after the transfer is
766 * complete. This allows the user hooks to be executed at the
767 * right time.
768 */
769 wait_for_vasi_session_completed(handle);
770 } else
771 pseries_cancel_migration(handle, ret);
772
773 if (factor)
774 watchdog_nmi_set_timeout_pct(0);
775
776 out:
777 vas_migration_handler(VAS_RESUME);
778
779 return ret;
780 }
781
rtas_syscall_dispatch_ibm_suspend_me(u64 handle)782 int rtas_syscall_dispatch_ibm_suspend_me(u64 handle)
783 {
784 return pseries_migrate_partition(handle);
785 }
786
migration_store(struct class * class,struct class_attribute * attr,const char * buf,size_t count)787 static ssize_t migration_store(struct class *class,
788 struct class_attribute *attr, const char *buf,
789 size_t count)
790 {
791 u64 streamid;
792 int rc;
793
794 rc = kstrtou64(buf, 0, &streamid);
795 if (rc)
796 return rc;
797
798 rc = pseries_migrate_partition(streamid);
799 if (rc)
800 return rc;
801
802 return count;
803 }
804
805 /*
806 * Used by drmgr to determine the kernel behavior of the migration interface.
807 *
808 * Version 1: Performs all PAPR requirements for migration including
809 * firmware activation and device tree update.
810 */
811 #define MIGRATION_API_VERSION 1
812
813 static CLASS_ATTR_WO(migration);
814 static CLASS_ATTR_STRING(api_version, 0444, __stringify(MIGRATION_API_VERSION));
815
mobility_sysfs_init(void)816 static int __init mobility_sysfs_init(void)
817 {
818 int rc;
819
820 mobility_kobj = kobject_create_and_add("mobility", kernel_kobj);
821 if (!mobility_kobj)
822 return -ENOMEM;
823
824 rc = sysfs_create_file(mobility_kobj, &class_attr_migration.attr);
825 if (rc)
826 pr_err("unable to create migration sysfs file (%d)\n", rc);
827
828 rc = sysfs_create_file(mobility_kobj, &class_attr_api_version.attr.attr);
829 if (rc)
830 pr_err("unable to create api_version sysfs file (%d)\n", rc);
831
832 return 0;
833 }
834 machine_device_initcall(pseries, mobility_sysfs_init);
835