1 /*
2  * IA-64 Huge TLB Page Support for Kernel.
3  *
4  * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
5  */
6 
7 #include <linux/config.h>
8 #include <linux/init.h>
9 #include <linux/fs.h>
10 #include <linux/mm.h>
11 #include <linux/hugetlb.h>
12 #include <linux/pagemap.h>
13 #include <linux/smp_lock.h>
14 #include <linux/slab.h>
15 #include <linux/sysctl.h>
16 #include <asm/mman.h>
17 #include <asm/pgalloc.h>
18 #include <asm/tlb.h>
19 
20 
21 #define TASK_HPAGE_BASE (REGION_HPAGE << REGION_SHIFT)
22 
23 static long    htlbpagemem;
24 int     htlbpage_max;
25 static long    htlbzone_pages;
26 
27 struct vm_operations_struct hugetlb_vm_ops;
28 static LIST_HEAD(htlbpage_freelist);
29 static spinlock_t htlbpage_lock = SPIN_LOCK_UNLOCKED;
30 
alloc_hugetlb_page(void)31 static struct page *alloc_hugetlb_page(void)
32 {
33 	int i;
34 	struct page *page;
35 
36 	spin_lock(&htlbpage_lock);
37 	if (list_empty(&htlbpage_freelist)) {
38 		spin_unlock(&htlbpage_lock);
39 		return NULL;
40 	}
41 
42 	page = list_entry(htlbpage_freelist.next, struct page, list);
43 	list_del(&page->list);
44 	htlbpagemem--;
45 	spin_unlock(&htlbpage_lock);
46 	set_page_count(page, 1);
47 	for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i)
48 		clear_highpage(&page[i]);
49 	return page;
50 }
51 
52 static pte_t *
huge_pte_alloc(struct mm_struct * mm,unsigned long addr)53 huge_pte_alloc (struct mm_struct *mm, unsigned long addr)
54 {
55 	unsigned long taddr = htlbpage_to_page(addr);
56 	pgd_t *pgd;
57 	pmd_t *pmd;
58 	pte_t *pte = NULL;
59 
60 	pgd = pgd_offset(mm, taddr);
61 	pmd = pmd_alloc(mm, pgd, taddr);
62 	if (pmd)
63 		pte = pte_alloc(mm, pmd, taddr);
64 	return pte;
65 }
66 
67 static pte_t *
huge_pte_offset(struct mm_struct * mm,unsigned long addr)68 huge_pte_offset (struct mm_struct *mm, unsigned long addr)
69 {
70 	unsigned long taddr = htlbpage_to_page(addr);
71 	pgd_t *pgd;
72 	pmd_t *pmd;
73 	pte_t *pte = NULL;
74 
75 	pgd = pgd_offset(mm, taddr);
76 	if (pgd_present(*pgd)) {
77 		pmd = pmd_offset(pgd, taddr);
78 		if (pmd_present(*pmd))
79 			pte = pte_offset(pmd, taddr);
80 	}
81 
82 	return pte;
83 }
84 
85 #define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; }
86 
87 static void
set_huge_pte(struct mm_struct * mm,struct vm_area_struct * vma,struct page * page,pte_t * page_table,int write_access)88 set_huge_pte (struct mm_struct *mm, struct vm_area_struct *vma,
89 	      struct page *page, pte_t * page_table, int write_access)
90 {
91 	pte_t entry;
92 
93 	mm->rss += (HPAGE_SIZE / PAGE_SIZE);
94 	if (write_access) {
95 		entry =
96 		    pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
97 	} else
98 		entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
99 	entry = pte_mkyoung(entry);
100 	mk_pte_huge(entry);
101 	set_pte(page_table, entry);
102 	return;
103 }
104 /*
105  * This function checks for proper alignment of input addr and len parameters.
106  */
is_aligned_hugepage_range(unsigned long addr,unsigned long len)107 int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
108 {
109 	if (len & ~HPAGE_MASK)
110 		return -EINVAL;
111 	if (addr & ~HPAGE_MASK)
112 		return -EINVAL;
113 	if (REGION_NUMBER(addr) != REGION_HPAGE)
114 		return -EINVAL;
115 
116 	return 0;
117 }
118 /* This function checks if the address and address+len falls out of HugeTLB region.  It
119  * return -EINVAL if any part of address range falls in HugeTLB region.
120  */
is_invalid_hugepage_range(unsigned long addr,unsigned long len)121 int  is_invalid_hugepage_range(unsigned long addr, unsigned long len)
122 {
123 	if (REGION_NUMBER(addr) == REGION_HPAGE)
124 		return -EINVAL;
125 	if (REGION_NUMBER(addr+len) == REGION_HPAGE)
126 		return -EINVAL;
127 	return 0;
128 }
129 
130 /*
131  * Same as generic free_pgtables(), except constant PGDIR_* and pgd_offset
132  * are hugetlb region specific.
133  */
hugetlb_free_pgtables(struct mm_struct * mm,struct vm_area_struct * prev,unsigned long start,unsigned long end)134 void hugetlb_free_pgtables(struct mm_struct * mm, struct vm_area_struct *prev,
135 	unsigned long start, unsigned long end)
136 {
137 	unsigned long first = start & HUGETLB_PGDIR_MASK;
138 	unsigned long last = end + HUGETLB_PGDIR_SIZE - 1;
139 	unsigned long start_index, end_index;
140 
141 	if (!prev) {
142 		prev = mm->mmap;
143 		if (!prev)
144 			goto no_mmaps;
145 		if (prev->vm_end > start) {
146 			if (last > prev->vm_start)
147 				last = prev->vm_start;
148 			goto no_mmaps;
149 		}
150 	}
151 	for (;;) {
152 		struct vm_area_struct *next = prev->vm_next;
153 
154 		if (next) {
155 			if (next->vm_start < start) {
156 				prev = next;
157 				continue;
158 			}
159 			if (last > next->vm_start)
160 				last = next->vm_start;
161 		}
162 		if (prev->vm_end > first)
163 			first = prev->vm_end + HUGETLB_PGDIR_SIZE - 1;
164 		break;
165 	}
166 no_mmaps:
167 	if (last < first)
168 		return;
169 	/*
170 	 * If the PGD bits are not consecutive in the virtual address, the
171 	 * old method of shifting the VA >> by PGDIR_SHIFT doesn't work.
172 	 */
173 	start_index = pgd_index(htlbpage_to_page(first));
174 	end_index = pgd_index(htlbpage_to_page(last));
175 	if (end_index > start_index) {
176 		clear_page_tables(mm, start_index, end_index - start_index);
177 		flush_tlb_pgtables(mm, first & HUGETLB_PGDIR_MASK,
178 				   last & HUGETLB_PGDIR_MASK);
179 	}
180 }
181 
copy_hugetlb_page_range(struct mm_struct * dst,struct mm_struct * src,struct vm_area_struct * vma)182 int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
183 			struct vm_area_struct *vma)
184 {
185 	pte_t *src_pte, *dst_pte, entry;
186 	struct page *ptepage;
187 	unsigned long addr = vma->vm_start;
188 	unsigned long end = vma->vm_end;
189 
190 	while (addr < end) {
191 		dst_pte = huge_pte_alloc(dst, addr);
192 		if (!dst_pte)
193 			goto nomem;
194 		src_pte = huge_pte_offset(src, addr);
195 		entry = *src_pte;
196 		ptepage = pte_page(entry);
197 		get_page(ptepage);
198 		set_pte(dst_pte, entry);
199 		dst->rss += (HPAGE_SIZE / PAGE_SIZE);
200 		addr += HPAGE_SIZE;
201 	}
202 	return 0;
203 nomem:
204 	return -ENOMEM;
205 }
206 
207 int
follow_hugetlb_page(struct mm_struct * mm,struct vm_area_struct * vma,struct page ** pages,struct vm_area_struct ** vmas,unsigned long * st,int * length,int i)208 follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
209 		    struct page **pages, struct vm_area_struct **vmas,
210 		    unsigned long *st, int *length, int i)
211 {
212 	pte_t *ptep, pte;
213 	unsigned long start = *st;
214 	unsigned long pstart;
215 	int len = *length;
216 	struct page *page;
217 
218 	do {
219 		pstart = start;
220 		ptep = huge_pte_offset(mm, start);
221 		pte = *ptep;
222 
223 back1:
224 		page = pte_page(pte);
225 		if (pages) {
226 			page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT);
227 			pages[i] = page;
228 		}
229 		if (vmas)
230 			vmas[i] = vma;
231 		i++;
232 		len--;
233 		start += PAGE_SIZE;
234 		if (((start & HPAGE_MASK) == pstart) && len &&
235 				(start < vma->vm_end))
236 			goto back1;
237 	} while (len && start < vma->vm_end);
238 	*length = len;
239 	*st = start;
240 	return i;
241 }
242 
free_huge_page(struct page * page)243 void free_huge_page(struct page *page)
244 {
245 	BUG_ON(page_count(page));
246 	BUG_ON(page->mapping);
247 
248 	INIT_LIST_HEAD(&page->list);
249 
250 	spin_lock(&htlbpage_lock);
251 	list_add(&page->list, &htlbpage_freelist);
252 	htlbpagemem++;
253 	spin_unlock(&htlbpage_lock);
254 }
255 
huge_page_release(struct page * page)256 void huge_page_release(struct page *page)
257 {
258 	if (!put_page_testzero(page))
259 		return;
260 
261 	free_huge_page(page);
262 }
263 
unmap_hugepage_range(struct vm_area_struct * vma,unsigned long start,unsigned long end)264 void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
265 {
266 	struct mm_struct *mm = vma->vm_mm;
267 	unsigned long address;
268 	pte_t *pte;
269 	struct page *page;
270 
271 	BUG_ON(start & (HPAGE_SIZE - 1));
272 	BUG_ON(end & (HPAGE_SIZE - 1));
273 
274 	for (address = start; address < end; address += HPAGE_SIZE) {
275 		pte = huge_pte_offset(mm, address);
276 		if (!pte || pte_none(*pte))
277 			continue;
278 		page = pte_page(*pte);
279 		huge_page_release(page);
280 		pte_clear(pte);
281 	}
282 	mm->rss -= (end - start) >> PAGE_SHIFT;
283 	flush_tlb_range(mm, start, end);
284 }
285 
zap_hugepage_range(struct vm_area_struct * vma,unsigned long start,unsigned long length)286 void zap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long length)
287 {
288 	struct mm_struct *mm = vma->vm_mm;
289 	spin_lock(&mm->page_table_lock);
290 	unmap_hugepage_range(vma, start, start + length);
291 	spin_unlock(&mm->page_table_lock);
292 }
293 
hugetlb_prefault(struct address_space * mapping,struct vm_area_struct * vma)294 int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
295 {
296 	struct mm_struct *mm = current->mm;
297 	struct inode *inode = mapping->host;
298 	unsigned long addr;
299 	int ret = 0;
300 
301 	BUG_ON(vma->vm_start & ~HPAGE_MASK);
302 	BUG_ON(vma->vm_end & ~HPAGE_MASK);
303 
304 	spin_lock(&mm->page_table_lock);
305 	for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
306 		unsigned long idx;
307 		pte_t *pte = huge_pte_alloc(mm, addr);
308 		struct page *page;
309 
310 		if (!pte) {
311 			ret = -ENOMEM;
312 			goto out;
313 		}
314 		if (!pte_none(*pte))
315 			continue;
316 
317 		idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
318 			+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
319 		page = find_get_page(mapping, idx);
320 		if (!page) {
321 			/* charge the fs quota first */
322 			if (hugetlb_get_quota(mapping)) {
323 				ret = -ENOMEM;
324 				goto out;
325 			}
326 			page = alloc_hugetlb_page();
327 			if (!page) {
328 				hugetlb_put_quota(mapping);
329 				ret = -ENOMEM;
330 				goto out;
331 			}
332 			add_to_page_cache(page, mapping, idx);
333 			unlock_page(page);
334 		}
335 		set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
336 	}
337 out:
338 	spin_unlock(&mm->page_table_lock);
339 	return ret;
340 }
341 
hugetlb_get_unmapped_area(struct file * file,unsigned long addr,unsigned long len,unsigned long pgoff,unsigned long flags)342 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
343 		unsigned long pgoff, unsigned long flags)
344 {
345 	struct vm_area_struct *vmm;
346 
347 	if (len > RGN_MAP_LIMIT)
348 		return -ENOMEM;
349 	if (len & ~HPAGE_MASK)
350 		return -EINVAL;
351 	/* This code assumes that REGION_HPAGE != 0. */
352 	if ((REGION_NUMBER(addr) != REGION_HPAGE) || (addr & (HPAGE_SIZE - 1)))
353 		addr = TASK_HPAGE_BASE;
354 	else
355 		addr = COLOR_HALIGN(addr);
356 	for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
357 		/* At this point:  (!vmm || addr < vmm->vm_end). */
358 		if (REGION_OFFSET(addr) + len > RGN_MAP_LIMIT)
359 			return -ENOMEM;
360 		if (!vmm || (addr + len) <= vmm->vm_start)
361 			return addr;
362 		addr = COLOR_HALIGN(vmm->vm_end);
363 	}
364 }
update_and_free_page(struct page * page)365 void update_and_free_page(struct page *page)
366 {
367 	int j;
368 	struct page *map;
369 
370 	map = page;
371 	htlbzone_pages--;
372 	for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) {
373 		map->flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
374 				1 << PG_dirty | 1 << PG_active | 1 << PG_reserved);
375 		set_page_count(map, 0);
376 		map++;
377 	}
378 	set_page_count(page, 1);
379 	__free_pages(page, HUGETLB_PAGE_ORDER);
380 }
381 
try_to_free_low(int count)382 int try_to_free_low(int count)
383 {
384 	struct list_head *p;
385 	struct page *page, *map;
386 
387 	map = NULL;
388 	spin_lock(&htlbpage_lock);
389 	list_for_each(p, &htlbpage_freelist) {
390 		if (map) {
391 			list_del(&map->list);
392 			update_and_free_page(map);
393 			htlbpagemem--;
394 			map = NULL;
395 			if (++count == 0)
396 				break;
397 		}
398 		page = list_entry(p, struct page, list);
399 		if ((page_zone(page))->name[0] != 'H') //Look for non-Highmem zones.
400 			map = page;
401 	}
402 	if (map) {
403 		list_del(&map->list);
404 		update_and_free_page(map);
405 		htlbpagemem--;
406 		count++;
407 	}
408 	spin_unlock(&htlbpage_lock);
409 	return count;
410 }
411 
set_hugetlb_mem_size(int count)412 int set_hugetlb_mem_size(int count)
413 {
414 	int j, lcount;
415 	struct page *page, *map;
416 
417 	if (count < 0)
418 		lcount = count;
419 	else
420 		lcount = count - htlbzone_pages;
421 
422 	if (lcount == 0)
423 		return (int)htlbzone_pages;
424 	if (lcount > 0) {	/* Increase the mem size. */
425 		while (lcount--) {
426 			page = alloc_pages(__GFP_HIGHMEM, HUGETLB_PAGE_ORDER);
427 			if (page == NULL)
428 				break;
429 			map = page;
430 			for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) {
431 				SetPageReserved(map);
432 				map++;
433 			}
434 			spin_lock(&htlbpage_lock);
435 			list_add(&page->list, &htlbpage_freelist);
436 			htlbpagemem++;
437 			htlbzone_pages++;
438 			spin_unlock(&htlbpage_lock);
439 		}
440 		return (int) htlbzone_pages;
441 	}
442 	/* Shrink the memory size. */
443 	lcount = try_to_free_low(lcount);
444 	while (lcount++ < 0) {
445 		page = alloc_hugetlb_page();
446 		if (page == NULL)
447 			break;
448 		spin_lock(&htlbpage_lock);
449 		update_and_free_page(page);
450 		spin_unlock(&htlbpage_lock);
451 	}
452 	return (int) htlbzone_pages;
453 }
454 
hugetlb_sysctl_handler(ctl_table * table,int write,struct file * file,void * buffer,size_t * length)455 int hugetlb_sysctl_handler(ctl_table *table, int write, struct file *file, void *buffer, size_t *length)
456 {
457 	proc_dointvec(table, write, file, buffer, length);
458 	htlbpage_max = set_hugetlb_mem_size(htlbpage_max);
459 	return 0;
460 }
461 
hugetlb_setup(char * s)462 static int __init hugetlb_setup(char *s)
463 {
464 	if (sscanf(s, "%d", &htlbpage_max) <= 0)
465 		htlbpage_max = 0;
466 	return 1;
467 }
468 __setup("hugepages=", hugetlb_setup);
469 
hugetlb_init(void)470 static int __init hugetlb_init(void)
471 {
472 	int i, j;
473 	struct page *page;
474 
475 	for (i = 0; i < htlbpage_max; ++i) {
476 		page = alloc_pages(__GFP_HIGHMEM, HUGETLB_PAGE_ORDER);
477 		if (!page)
478 			break;
479 		for (j = 0; j < HPAGE_SIZE/PAGE_SIZE; ++j)
480 			SetPageReserved(&page[j]);
481 		spin_lock(&htlbpage_lock);
482 		list_add(&page->list, &htlbpage_freelist);
483 		spin_unlock(&htlbpage_lock);
484 	}
485 	htlbpage_max = htlbpagemem = htlbzone_pages = i;
486 	printk("Total HugeTLB memory allocated, %ld\n", htlbpagemem);
487 	return 0;
488 }
489 module_init(hugetlb_init);
490 
hugetlb_report_meminfo(char * buf)491 int hugetlb_report_meminfo(char *buf)
492 {
493 	return sprintf(buf,
494 			"HugePages_Total: %5lu\n"
495 			"HugePages_Free:  %5lu\n"
496 			"Hugepagesize:    %5lu kB\n",
497 			htlbzone_pages,
498 			htlbpagemem,
499 			HPAGE_SIZE/1024);
500 }
501 
is_hugepage_mem_enough(size_t size)502 int is_hugepage_mem_enough(size_t size)
503 {
504 	if (size > (htlbpagemem << HPAGE_SHIFT))
505 		return 0;
506 	return 1;
507 }
508 
hugetlb_nopage(struct vm_area_struct * area,unsigned long address,int unused)509 static struct page *hugetlb_nopage(struct vm_area_struct * area, unsigned long address, int unused)
510 {
511 	BUG();
512 	return NULL;
513 }
514 
515 struct vm_operations_struct hugetlb_vm_ops = {
516 	.nopage =	hugetlb_nopage,
517 };
518