1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright IBM Corp. 2011
4  * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
5  */
6 #include <linux/hugetlb.h>
7 #include <linux/proc_fs.h>
8 #include <linux/vmalloc.h>
9 #include <linux/mm.h>
10 #include <asm/cacheflush.h>
11 #include <asm/facility.h>
12 #include <asm/pgalloc.h>
13 #include <asm/kfence.h>
14 #include <asm/page.h>
15 #include <asm/set_memory.h>
16 
sske_frame(unsigned long addr,unsigned char skey)17 static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
18 {
19 	asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],1,0"
20 		     : [addr] "+a" (addr) : [skey] "d" (skey));
21 	return addr;
22 }
23 
__storage_key_init_range(unsigned long start,unsigned long end)24 void __storage_key_init_range(unsigned long start, unsigned long end)
25 {
26 	unsigned long boundary, size;
27 
28 	while (start < end) {
29 		if (MACHINE_HAS_EDAT1) {
30 			/* set storage keys for a 1MB frame */
31 			size = 1UL << 20;
32 			boundary = (start + size) & ~(size - 1);
33 			if (boundary <= end) {
34 				do {
35 					start = sske_frame(start, PAGE_DEFAULT_KEY);
36 				} while (start < boundary);
37 				continue;
38 			}
39 		}
40 		page_set_storage_key(start, PAGE_DEFAULT_KEY, 1);
41 		start += PAGE_SIZE;
42 	}
43 }
44 
45 #ifdef CONFIG_PROC_FS
46 atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
47 
arch_report_meminfo(struct seq_file * m)48 void arch_report_meminfo(struct seq_file *m)
49 {
50 	seq_printf(m, "DirectMap4k:    %8lu kB\n",
51 		   atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_4K]) << 2);
52 	seq_printf(m, "DirectMap1M:    %8lu kB\n",
53 		   atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_1M]) << 10);
54 	seq_printf(m, "DirectMap2G:    %8lu kB\n",
55 		   atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_2G]) << 21);
56 }
57 #endif /* CONFIG_PROC_FS */
58 
pgt_set(unsigned long * old,unsigned long new,unsigned long addr,unsigned long dtt)59 static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr,
60 		    unsigned long dtt)
61 {
62 	unsigned long *table, mask;
63 
64 	mask = 0;
65 	if (MACHINE_HAS_EDAT2) {
66 		switch (dtt) {
67 		case CRDTE_DTT_REGION3:
68 			mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1);
69 			break;
70 		case CRDTE_DTT_SEGMENT:
71 			mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
72 			break;
73 		case CRDTE_DTT_PAGE:
74 			mask = ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
75 			break;
76 		}
77 		table = (unsigned long *)((unsigned long)old & mask);
78 		crdte(*old, new, table, dtt, addr, S390_lowcore.kernel_asce);
79 	} else if (MACHINE_HAS_IDTE) {
80 		cspg(old, *old, new);
81 	} else {
82 		csp((unsigned int *)old + 1, *old, new);
83 	}
84 }
85 
walk_pte_level(pmd_t * pmdp,unsigned long addr,unsigned long end,unsigned long flags)86 static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
87 			  unsigned long flags)
88 {
89 	pte_t *ptep, new;
90 
91 	if (flags == SET_MEMORY_4K)
92 		return 0;
93 	ptep = pte_offset_kernel(pmdp, addr);
94 	do {
95 		new = *ptep;
96 		if (pte_none(new))
97 			return -EINVAL;
98 		if (flags & SET_MEMORY_RO)
99 			new = pte_wrprotect(new);
100 		else if (flags & SET_MEMORY_RW)
101 			new = pte_mkwrite_novma(pte_mkdirty(new));
102 		if (flags & SET_MEMORY_NX)
103 			new = set_pte_bit(new, __pgprot(_PAGE_NOEXEC));
104 		else if (flags & SET_MEMORY_X)
105 			new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC));
106 		if (flags & SET_MEMORY_INV) {
107 			new = set_pte_bit(new, __pgprot(_PAGE_INVALID));
108 		} else if (flags & SET_MEMORY_DEF) {
109 			new = __pte(pte_val(new) & PAGE_MASK);
110 			new = set_pte_bit(new, PAGE_KERNEL);
111 			if (!MACHINE_HAS_NX)
112 				new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC));
113 		}
114 		pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE);
115 		ptep++;
116 		addr += PAGE_SIZE;
117 		cond_resched();
118 	} while (addr < end);
119 	return 0;
120 }
121 
split_pmd_page(pmd_t * pmdp,unsigned long addr)122 static int split_pmd_page(pmd_t *pmdp, unsigned long addr)
123 {
124 	unsigned long pte_addr, prot;
125 	pte_t *pt_dir, *ptep;
126 	pmd_t new;
127 	int i, ro, nx;
128 
129 	pt_dir = vmem_pte_alloc();
130 	if (!pt_dir)
131 		return -ENOMEM;
132 	pte_addr = pmd_pfn(*pmdp) << PAGE_SHIFT;
133 	ro = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT);
134 	nx = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_NOEXEC);
135 	prot = pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL);
136 	if (!nx)
137 		prot &= ~_PAGE_NOEXEC;
138 	ptep = pt_dir;
139 	for (i = 0; i < PTRS_PER_PTE; i++) {
140 		set_pte(ptep, __pte(pte_addr | prot));
141 		pte_addr += PAGE_SIZE;
142 		ptep++;
143 	}
144 	new = __pmd(__pa(pt_dir) | _SEGMENT_ENTRY);
145 	pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
146 	update_page_count(PG_DIRECT_MAP_4K, PTRS_PER_PTE);
147 	update_page_count(PG_DIRECT_MAP_1M, -1);
148 	return 0;
149 }
150 
modify_pmd_page(pmd_t * pmdp,unsigned long addr,unsigned long flags)151 static void modify_pmd_page(pmd_t *pmdp, unsigned long addr,
152 			    unsigned long flags)
153 {
154 	pmd_t new = *pmdp;
155 
156 	if (flags & SET_MEMORY_RO)
157 		new = pmd_wrprotect(new);
158 	else if (flags & SET_MEMORY_RW)
159 		new = pmd_mkwrite_novma(pmd_mkdirty(new));
160 	if (flags & SET_MEMORY_NX)
161 		new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
162 	else if (flags & SET_MEMORY_X)
163 		new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
164 	if (flags & SET_MEMORY_INV) {
165 		new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID));
166 	} else if (flags & SET_MEMORY_DEF) {
167 		new = __pmd(pmd_val(new) & PMD_MASK);
168 		new = set_pmd_bit(new, SEGMENT_KERNEL);
169 		if (!MACHINE_HAS_NX)
170 			new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
171 	}
172 	pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
173 }
174 
walk_pmd_level(pud_t * pudp,unsigned long addr,unsigned long end,unsigned long flags)175 static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end,
176 			  unsigned long flags)
177 {
178 	unsigned long next;
179 	int need_split;
180 	pmd_t *pmdp;
181 	int rc = 0;
182 
183 	pmdp = pmd_offset(pudp, addr);
184 	do {
185 		if (pmd_none(*pmdp))
186 			return -EINVAL;
187 		next = pmd_addr_end(addr, end);
188 		if (pmd_large(*pmdp)) {
189 			need_split  = !!(flags & SET_MEMORY_4K);
190 			need_split |= !!(addr & ~PMD_MASK);
191 			need_split |= !!(addr + PMD_SIZE > next);
192 			if (need_split) {
193 				rc = split_pmd_page(pmdp, addr);
194 				if (rc)
195 					return rc;
196 				continue;
197 			}
198 			modify_pmd_page(pmdp, addr, flags);
199 		} else {
200 			rc = walk_pte_level(pmdp, addr, next, flags);
201 			if (rc)
202 				return rc;
203 		}
204 		pmdp++;
205 		addr = next;
206 		cond_resched();
207 	} while (addr < end);
208 	return rc;
209 }
210 
split_pud_page(pud_t * pudp,unsigned long addr)211 static int split_pud_page(pud_t *pudp, unsigned long addr)
212 {
213 	unsigned long pmd_addr, prot;
214 	pmd_t *pm_dir, *pmdp;
215 	pud_t new;
216 	int i, ro, nx;
217 
218 	pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
219 	if (!pm_dir)
220 		return -ENOMEM;
221 	pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT;
222 	ro = !!(pud_val(*pudp) & _REGION_ENTRY_PROTECT);
223 	nx = !!(pud_val(*pudp) & _REGION_ENTRY_NOEXEC);
224 	prot = pgprot_val(ro ? SEGMENT_KERNEL_RO : SEGMENT_KERNEL);
225 	if (!nx)
226 		prot &= ~_SEGMENT_ENTRY_NOEXEC;
227 	pmdp = pm_dir;
228 	for (i = 0; i < PTRS_PER_PMD; i++) {
229 		set_pmd(pmdp, __pmd(pmd_addr | prot));
230 		pmd_addr += PMD_SIZE;
231 		pmdp++;
232 	}
233 	new = __pud(__pa(pm_dir) | _REGION3_ENTRY);
234 	pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
235 	update_page_count(PG_DIRECT_MAP_1M, PTRS_PER_PMD);
236 	update_page_count(PG_DIRECT_MAP_2G, -1);
237 	return 0;
238 }
239 
modify_pud_page(pud_t * pudp,unsigned long addr,unsigned long flags)240 static void modify_pud_page(pud_t *pudp, unsigned long addr,
241 			    unsigned long flags)
242 {
243 	pud_t new = *pudp;
244 
245 	if (flags & SET_MEMORY_RO)
246 		new = pud_wrprotect(new);
247 	else if (flags & SET_MEMORY_RW)
248 		new = pud_mkwrite(pud_mkdirty(new));
249 	if (flags & SET_MEMORY_NX)
250 		new = set_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
251 	else if (flags & SET_MEMORY_X)
252 		new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
253 	if (flags & SET_MEMORY_INV) {
254 		new = set_pud_bit(new, __pgprot(_REGION_ENTRY_INVALID));
255 	} else if (flags & SET_MEMORY_DEF) {
256 		new = __pud(pud_val(new) & PUD_MASK);
257 		new = set_pud_bit(new, REGION3_KERNEL);
258 		if (!MACHINE_HAS_NX)
259 			new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
260 	}
261 	pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
262 }
263 
walk_pud_level(p4d_t * p4d,unsigned long addr,unsigned long end,unsigned long flags)264 static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end,
265 			  unsigned long flags)
266 {
267 	unsigned long next;
268 	int need_split;
269 	pud_t *pudp;
270 	int rc = 0;
271 
272 	pudp = pud_offset(p4d, addr);
273 	do {
274 		if (pud_none(*pudp))
275 			return -EINVAL;
276 		next = pud_addr_end(addr, end);
277 		if (pud_large(*pudp)) {
278 			need_split  = !!(flags & SET_MEMORY_4K);
279 			need_split |= !!(addr & ~PUD_MASK);
280 			need_split |= !!(addr + PUD_SIZE > next);
281 			if (need_split) {
282 				rc = split_pud_page(pudp, addr);
283 				if (rc)
284 					break;
285 				continue;
286 			}
287 			modify_pud_page(pudp, addr, flags);
288 		} else {
289 			rc = walk_pmd_level(pudp, addr, next, flags);
290 		}
291 		pudp++;
292 		addr = next;
293 		cond_resched();
294 	} while (addr < end && !rc);
295 	return rc;
296 }
297 
walk_p4d_level(pgd_t * pgd,unsigned long addr,unsigned long end,unsigned long flags)298 static int walk_p4d_level(pgd_t *pgd, unsigned long addr, unsigned long end,
299 			  unsigned long flags)
300 {
301 	unsigned long next;
302 	p4d_t *p4dp;
303 	int rc = 0;
304 
305 	p4dp = p4d_offset(pgd, addr);
306 	do {
307 		if (p4d_none(*p4dp))
308 			return -EINVAL;
309 		next = p4d_addr_end(addr, end);
310 		rc = walk_pud_level(p4dp, addr, next, flags);
311 		p4dp++;
312 		addr = next;
313 		cond_resched();
314 	} while (addr < end && !rc);
315 	return rc;
316 }
317 
318 DEFINE_MUTEX(cpa_mutex);
319 
change_page_attr(unsigned long addr,unsigned long end,unsigned long flags)320 static int change_page_attr(unsigned long addr, unsigned long end,
321 			    unsigned long flags)
322 {
323 	unsigned long next;
324 	int rc = -EINVAL;
325 	pgd_t *pgdp;
326 
327 	pgdp = pgd_offset_k(addr);
328 	do {
329 		if (pgd_none(*pgdp))
330 			break;
331 		next = pgd_addr_end(addr, end);
332 		rc = walk_p4d_level(pgdp, addr, next, flags);
333 		if (rc)
334 			break;
335 		cond_resched();
336 	} while (pgdp++, addr = next, addr < end && !rc);
337 	return rc;
338 }
339 
change_page_attr_alias(unsigned long addr,unsigned long end,unsigned long flags)340 static int change_page_attr_alias(unsigned long addr, unsigned long end,
341 				  unsigned long flags)
342 {
343 	unsigned long alias, offset, va_start, va_end;
344 	struct vm_struct *area;
345 	int rc = 0;
346 
347 	/*
348 	 * Changes to read-only permissions on kernel VA mappings are also
349 	 * applied to the kernel direct mapping. Execute permissions are
350 	 * intentionally not transferred to keep all allocated pages within
351 	 * the direct mapping non-executable.
352 	 */
353 	flags &= SET_MEMORY_RO | SET_MEMORY_RW;
354 	if (!flags)
355 		return 0;
356 	area = NULL;
357 	while (addr < end) {
358 		if (!area)
359 			area = find_vm_area((void *)addr);
360 		if (!area || !(area->flags & VM_ALLOC))
361 			return 0;
362 		va_start = (unsigned long)area->addr;
363 		va_end = va_start + area->nr_pages * PAGE_SIZE;
364 		offset = (addr - va_start) >> PAGE_SHIFT;
365 		alias = (unsigned long)page_address(area->pages[offset]);
366 		rc = change_page_attr(alias, alias + PAGE_SIZE, flags);
367 		if (rc)
368 			break;
369 		addr += PAGE_SIZE;
370 		if (addr >= va_end)
371 			area = NULL;
372 	}
373 	return rc;
374 }
375 
__set_memory(unsigned long addr,unsigned long numpages,unsigned long flags)376 int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags)
377 {
378 	unsigned long end;
379 	int rc;
380 
381 	if (!MACHINE_HAS_NX)
382 		flags &= ~(SET_MEMORY_NX | SET_MEMORY_X);
383 	if (!flags)
384 		return 0;
385 	if (!numpages)
386 		return 0;
387 	addr &= PAGE_MASK;
388 	end = addr + numpages * PAGE_SIZE;
389 	mutex_lock(&cpa_mutex);
390 	rc = change_page_attr(addr, end, flags);
391 	if (rc)
392 		goto out;
393 	rc = change_page_attr_alias(addr, end, flags);
394 out:
395 	mutex_unlock(&cpa_mutex);
396 	return rc;
397 }
398 
set_direct_map_invalid_noflush(struct page * page)399 int set_direct_map_invalid_noflush(struct page *page)
400 {
401 	return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_INV);
402 }
403 
set_direct_map_default_noflush(struct page * page)404 int set_direct_map_default_noflush(struct page *page)
405 {
406 	return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_DEF);
407 }
408 
409 #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
410 
ipte_range(pte_t * pte,unsigned long address,int nr)411 static void ipte_range(pte_t *pte, unsigned long address, int nr)
412 {
413 	int i;
414 
415 	if (test_facility(13)) {
416 		__ptep_ipte_range(address, nr - 1, pte, IPTE_GLOBAL);
417 		return;
418 	}
419 	for (i = 0; i < nr; i++) {
420 		__ptep_ipte(address, pte, 0, 0, IPTE_GLOBAL);
421 		address += PAGE_SIZE;
422 		pte++;
423 	}
424 }
425 
__kernel_map_pages(struct page * page,int numpages,int enable)426 void __kernel_map_pages(struct page *page, int numpages, int enable)
427 {
428 	unsigned long address;
429 	pte_t *ptep, pte;
430 	int nr, i, j;
431 
432 	for (i = 0; i < numpages;) {
433 		address = (unsigned long)page_to_virt(page + i);
434 		ptep = virt_to_kpte(address);
435 		nr = (unsigned long)ptep >> ilog2(sizeof(long));
436 		nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1));
437 		nr = min(numpages - i, nr);
438 		if (enable) {
439 			for (j = 0; j < nr; j++) {
440 				pte = clear_pte_bit(*ptep, __pgprot(_PAGE_INVALID));
441 				set_pte(ptep, pte);
442 				address += PAGE_SIZE;
443 				ptep++;
444 			}
445 		} else {
446 			ipte_range(ptep, address, nr);
447 		}
448 		i += nr;
449 	}
450 }
451 
452 #endif /* CONFIG_DEBUG_PAGEALLOC */
453