/* x86-64 MTRR (Memory Type Range Register) driver. Based largely upon arch/i386/kernel/mtrr.c Copyright (C) 1997-2000 Richard Gooch Copyright (C) 2002 Dave Jones. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. (For earlier history, see arch/i386/kernel/mtrr.c) v2.00 September 2001 Dave Jones Initial rewrite for x86-64. Removal of non-Intel style MTRR code. v2.01 June 2002 Dave Jones Removal of redundant abstraction layer. 64-bit fixes. v2.02 July 2002 Dave Jones Fix gentry inconsistencies between kernel/userspace. More casts to clean up warnings. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MTRR_NEED_STRINGS #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MTRR_VERSION "2.02 (20020716)" #define MTRR_BEG_BIT 12 #define MTRR_END_BIT 7 #undef Dprintk #define Dprintk(...) #define TRUE 1 #define FALSE 0 #define MSR_MTRRphysBase(reg) (0x200 + 2 * (reg)) #define MSR_MTRRphysMask(reg) (0x200 + 2 * (reg) + 1) #define NUM_FIXED_RANGES 88 #define MTRR_CHANGE_MASK_FIXED 0x01 #define MTRR_CHANGE_MASK_VARIABLE 0x02 #define MTRR_CHANGE_MASK_DEFTYPE 0x04 typedef u8 mtrr_type; #define LINE_SIZE 80 #ifdef CONFIG_SMP #define set_mtrr(reg,base,size,type) set_mtrr_smp (reg, base, size, type) #else #define set_mtrr(reg,base,size,type) set_mtrr_up (reg, base, size, type, TRUE) #endif #if defined(CONFIG_PROC_FS) || defined(CONFIG_DEVFS_FS) #define USERSPACE_INTERFACE #endif #ifdef USERSPACE_INTERFACE static char *ascii_buffer; static unsigned int ascii_buf_bytes; static void compute_ascii (void); #else #define compute_ascii() while (0) #endif static unsigned int *usage_table; static DECLARE_MUTEX (mtrr_lock); struct set_mtrr_context { u32 deftype_lo; u32 deftype_hi; unsigned long flags; u64 cr4val; }; /* Put the processor into a state where MTRRs can be safely set */ static void set_mtrr_prepare (struct set_mtrr_context *ctxt) { u64 cr0; /* Disable interrupts locally */ __save_flags(ctxt->flags); __cli(); /* Save value of CR4 and clear Page Global Enable (bit 7) */ if (test_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability)) { ctxt->cr4val = read_cr4(); write_cr4(ctxt->cr4val & ~(1UL << 7)); } /* Disable and flush caches. Note that wbinvd flushes the TLBs as a side-effect */ cr0 = read_cr0() | 0x40000000; wbinvd(); write_cr0(cr0); wbinvd(); rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi); } static void set_mtrr_disable(struct set_mtrr_context *ctxt) { /* Disable MTRRs, and set the default type to uncached */ wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL, ctxt->deftype_hi); } /* Restore the processor after a set_mtrr_prepare */ static void set_mtrr_done (struct set_mtrr_context *ctxt) { /* Flush caches and TLBs */ wbinvd(); /* Restore MTRRdefType */ wrmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi); /* Enable caches */ write_cr0(read_cr0() & 0xbfffffff); /* Restore value of CR4 */ if (test_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability)) write_cr4 (ctxt->cr4val); /* Re-enable interrupts locally (if enabled previously) */ __restore_flags(ctxt->flags); } /* This function returns the number of variable MTRRs */ static unsigned int get_num_var_ranges (void) { u32 config, dummy; rdmsr (MSR_MTRRcap, config, dummy); return (config & 0xff); } /* Returns non-zero if we have the write-combining memory type */ static int have_wrcomb (void) { u32 config, dummy; rdmsr (MSR_MTRRcap, config, dummy); return (config & (1 << 10)); } static u64 size_or_mask, size_and_mask; static void get_mtrr (unsigned int reg, u64 *base, u32 *size, mtrr_type * type) { u32 mask_lo, mask_hi; u32 base_lo, base_hi; rdmsr (MSR_MTRRphysMask(reg), mask_lo, mask_hi); if ((mask_lo & 0x800) == 0) { /* Invalid (i.e. free) range */ *base = 0; *size = 0; *type = 0; return; } rdmsr (MSR_MTRRphysBase(reg), base_lo, base_hi); /* Work out the shifted address mask */ mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT; /* This works correctly if size is a power of two, i.e. a continguous range. */ *size = -mask_lo; *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT; *type = base_lo & 0xff; } /* * Set variable MTRR register on the local CPU. * The register to set. * The base address of the region. * The size of the region. If this is 0 the region is disabled. * The type of the region. * If TRUE, do the change safely. If FALSE, safety measures should * be done externally. */ static void set_mtrr_up (unsigned int reg, u64 base, u32 size, mtrr_type type, int do_safe) { struct set_mtrr_context ctxt; if (do_safe) { set_mtrr_prepare (&ctxt); set_mtrr_disable (&ctxt); } if (size == 0) { /* The invalid bit is kept in the mask, so we simply clear the relevant mask register to disable a range. */ wrmsr (MSR_MTRRphysMask(reg), 0, 0); } else { wrmsr (MSR_MTRRphysBase(reg), base << PAGE_SHIFT | type, (base & size_and_mask) >> (32 - PAGE_SHIFT)); wrmsr(MSR_MTRRphysMask(reg), -size << PAGE_SHIFT | 0x800, (-size & size_and_mask) >> (32 - PAGE_SHIFT)); } if (do_safe) set_mtrr_done (&ctxt); } #ifdef CONFIG_SMP struct mtrr_var_range { u32 base_lo; u32 base_hi; u32 mask_lo; u32 mask_hi; }; /* Get the MSR pair relating to a var range */ static void __init get_mtrr_var_range (unsigned int index, struct mtrr_var_range *vr) { rdmsr (MSR_MTRRphysBase(index), vr->base_lo, vr->base_hi); rdmsr (MSR_MTRRphysMask(index), vr->mask_lo, vr->mask_hi); } /* Set the MSR pair relating to a var range. Returns TRUE if changes are made */ static int __init set_mtrr_var_range_testing (unsigned int index, struct mtrr_var_range *vr) { u32 lo, hi; int changed = FALSE; rdmsr (MSR_MTRRphysBase(index), lo, hi); if ((vr->base_lo & 0xfffff0ff) != (lo & 0xfffff0ff) || (vr->base_hi & 0x000fffff) != (hi & 0x000fffff)) { wrmsr (MSR_MTRRphysBase(index), vr->base_lo, vr->base_hi); changed = TRUE; } rdmsr (MSR_MTRRphysMask(index), lo, hi); if ((vr->mask_lo & 0xfffff800) != (lo & 0xfffff800) || (vr->mask_hi & 0x000fffff) != (hi & 0x000fffff)) { wrmsr (MSR_MTRRphysMask(index), vr->mask_lo, vr->mask_hi); changed = TRUE; } return changed; } static void __init get_fixed_ranges (mtrr_type * frs) { u32 *p = (u32 *) frs; int i; rdmsr (MSR_MTRRfix64K_00000, p[0], p[1]); for (i = 0; i < 2; i++) rdmsr (MSR_MTRRfix16K_80000 + i, p[2 + i * 2], p[3 + i * 2]); for (i = 0; i < 8; i++) rdmsr (MSR_MTRRfix4K_C0000 + i, p[6 + i * 2], p[7 + i * 2]); } static int __init set_fixed_ranges_testing (mtrr_type * frs) { u32 *p = (u32 *) frs; int changed = FALSE; int i; u32 lo, hi; Dprintk (KERN_INFO "mtrr: rdmsr 64K_00000\n"); rdmsr (MSR_MTRRfix64K_00000, lo, hi); if (p[0] != lo || p[1] != hi) { Dprintk (KERN_INFO "mtrr: Writing %x:%x to 64K MSR. lohi were %x:%x\n", p[0], p[1], lo, hi); wrmsr (MSR_MTRRfix64K_00000, p[0], p[1]); changed = TRUE; } Dprintk (KERN_INFO "mtrr: rdmsr 16K_80000\n"); for (i = 0; i < 2; i++) { rdmsr (MSR_MTRRfix16K_80000 + i, lo, hi); if (p[2 + i * 2] != lo || p[3 + i * 2] != hi) { Dprintk (KERN_INFO "mtrr: Writing %x:%x to 16K MSR%d. lohi were %x:%x\n", p[2 + i * 2], p[3 + i * 2], i, lo, hi ); wrmsr (MSR_MTRRfix16K_80000 + i, p[2 + i * 2], p[3 + i * 2]); changed = TRUE; } } Dprintk (KERN_INFO "mtrr: rdmsr 4K_C0000\n"); for (i = 0; i < 8; i++) { rdmsr (MSR_MTRRfix4K_C0000 + i, lo, hi); Dprintk (KERN_INFO "mtrr: MTRRfix4K_C0000+%d = %x:%x\n", i, lo, hi); if (p[6 + i * 2] != lo || p[7 + i * 2] != hi) { Dprintk (KERN_INFO "mtrr: Writing %x:%x to 4K MSR%d. lohi were %x:%x\n", p[6 + i * 2], p[7 + i * 2], i, lo, hi); wrmsr (MSR_MTRRfix4K_C0000 + i, p[6 + i * 2], p[7 + i * 2]); changed = TRUE; } } return changed; } struct mtrr_state { unsigned int num_var_ranges; struct mtrr_var_range *var_ranges; mtrr_type fixed_ranges[NUM_FIXED_RANGES]; mtrr_type def_type; unsigned char enabled; }; /* Grab all of the MTRR state for this CPU into *state */ static void __init get_mtrr_state (struct mtrr_state *state) { unsigned int nvrs, i; struct mtrr_var_range *vrs; u32 lo, dummy; nvrs = state->num_var_ranges = get_num_var_ranges(); vrs = state->var_ranges = kmalloc (nvrs * sizeof (struct mtrr_var_range), GFP_KERNEL); if (vrs == NULL) nvrs = state->num_var_ranges = 0; for (i = 0; i < nvrs; i++) get_mtrr_var_range (i, &vrs[i]); get_fixed_ranges (state->fixed_ranges); rdmsr (MSR_MTRRdefType, lo, dummy); state->def_type = (lo & 0xff); state->enabled = (lo & 0xc00) >> 10; } /* Free resources associated with a struct mtrr_state */ static void __init finalize_mtrr_state (struct mtrr_state *state) { if (state->var_ranges) kfree (state->var_ranges); } /* * Set the MTRR state for this CPU. * The MTRR state information to read. * Some relevant CPU context. * [NOTE] The CPU must already be in a safe state for MTRR changes. * [RETURNS] 0 if no changes made, else a mask indication what was changed. */ static u64 __init set_mtrr_state (struct mtrr_state *state, struct set_mtrr_context *ctxt) { unsigned int i; u64 change_mask = 0; for (i = 0; i < state->num_var_ranges; i++) if (set_mtrr_var_range_testing (i, &state->var_ranges[i])) change_mask |= MTRR_CHANGE_MASK_VARIABLE; if (set_fixed_ranges_testing (state->fixed_ranges)) change_mask |= MTRR_CHANGE_MASK_FIXED; /* Set_mtrr_restore restores the old value of MTRRdefType, so to set it we fiddle with the saved value */ if ((ctxt->deftype_lo & 0xff) != state->def_type || ((ctxt->deftype_lo & 0xc00) >> 10) != state->enabled) { ctxt->deftype_lo |= (state->def_type | state->enabled << 10); change_mask |= MTRR_CHANGE_MASK_DEFTYPE; } return change_mask; } static atomic_t undone_count; static volatile int wait_barrier_mtrr_disable = FALSE; static volatile int wait_barrier_execute = FALSE; static volatile int wait_barrier_cache_enable = FALSE; struct set_mtrr_data { u64 smp_base; u32 smp_size; unsigned int smp_reg; mtrr_type smp_type; }; /* * Synchronisation handler. Executed by "other" CPUs. */ static void ipi_handler (void *info) { struct set_mtrr_data *data = info; struct set_mtrr_context ctxt; set_mtrr_prepare (&ctxt); /* Notify master that I've flushed and disabled my cache */ atomic_dec (&undone_count); while (wait_barrier_mtrr_disable) { rep_nop(); barrier (); } set_mtrr_disable (&ctxt); /* wait again for disable confirmation*/ atomic_dec (&undone_count); while (wait_barrier_execute) { rep_nop(); barrier(); } /* The master has cleared me to execute */ set_mtrr_up (data->smp_reg, data->smp_base, data->smp_size, data->smp_type, FALSE); /* Notify master CPU that I've executed the function */ atomic_dec (&undone_count); /* Wait for master to clear me to enable cache and return */ while (wait_barrier_cache_enable) { rep_nop(); barrier (); } set_mtrr_done (&ctxt); } static void set_mtrr_smp (unsigned int reg, u64 base, u32 size, mtrr_type type) { struct set_mtrr_data data; struct set_mtrr_context ctxt; data.smp_reg = reg; data.smp_base = base; data.smp_size = size; data.smp_type = type; wait_barrier_execute = TRUE; wait_barrier_cache_enable = TRUE; wait_barrier_mtrr_disable = TRUE; atomic_set (&undone_count, smp_num_cpus - 1); /* Start the ball rolling on other CPUs */ if (smp_call_function (ipi_handler, &data, 1, 0) != 0) panic ("mtrr: timed out waiting for other CPUs\n"); /* Flush and disable the local CPU's cache */ set_mtrr_prepare (&ctxt); while (atomic_read (&undone_count) > 0) { rep_nop(); barrier(); } /* Set up for completion wait and then release other CPUs to change MTRRs*/ atomic_set (&undone_count, smp_num_cpus - 1); wait_barrier_mtrr_disable = FALSE; set_mtrr_disable (&ctxt); /* Wait for all other CPUs to flush and disable their caches */ while (atomic_read (&undone_count) > 0) { rep_nop (); barrier (); } /* Set up for completion wait and then release other CPUs to change MTRRs */ atomic_set (&undone_count, smp_num_cpus - 1); wait_barrier_execute = FALSE; set_mtrr_up (reg, base, size, type, FALSE); /* Now wait for other CPUs to complete the function */ while (atomic_read (&undone_count) > 0) { rep_nop(); barrier (); } /* Now all CPUs should have finished the function. Release the barrier to allow them to re-enable their caches and return from their interrupt, then enable the local cache and return */ wait_barrier_cache_enable = FALSE; set_mtrr_done (&ctxt); } /* Some BIOS's are fucked and don't set all MTRRs the same! */ static void __init mtrr_state_warn (u32 mask) { if (!mask) return; if (mask & MTRR_CHANGE_MASK_FIXED) printk (KERN_INFO "mtrr: your CPUs had inconsistent fixed MTRR settings\n"); if (mask & MTRR_CHANGE_MASK_VARIABLE) printk (KERN_INFO "mtrr: your CPUs had inconsistent variable MTRR settings\n"); if (mask & MTRR_CHANGE_MASK_DEFTYPE) printk (KERN_INFO "mtrr: your CPUs had inconsistent MTRRdefType settings\n"); printk (KERN_INFO "mtrr: probably your BIOS does not setup all CPUs\n"); } #endif /* CONFIG_SMP */ static inline char * attrib_to_str (int x) { return (x <= 6) ? mtrr_strings[x] : "?"; } static void __init init_table (void) { int i, max; max = get_num_var_ranges (); if ((usage_table = kmalloc (max * sizeof *usage_table, GFP_KERNEL))==NULL) { printk ("mtrr: could not allocate\n"); return; } for (i = 0; i < max; i++) usage_table[i] = 1; #ifdef USERSPACE_INTERFACE if ((ascii_buffer = kmalloc (max * LINE_SIZE, GFP_KERNEL)) == NULL) { printk ("mtrr: could not allocate\n"); return; } ascii_buf_bytes = 0; compute_ascii (); #endif } /* * Get a free MTRR. * returns the index of the region on success, else -1 on error. */ static int get_free_region(void) { int i, max; mtrr_type ltype; u64 lbase; u32 lsize; max = get_num_var_ranges (); for (i = 0; i < max; ++i) { get_mtrr (i, &lbase, &lsize, <ype); if (lsize == 0) return i; } return -ENOSPC; } /** * mtrr_add_page - Add a memory type region * @base: Physical base address of region in pages (4 KB) * @size: Physical size of region in pages (4 KB) * @type: Type of MTRR desired * @increment: If this is true do usage counting on the region * Returns The MTRR register on success, else a negative number * indicating the error code. * * Memory type region registers control the caching on newer * processors. This function allows drivers to request an MTRR is added. * The caller should expect to need to provide a power of two size on * an equivalent power of two boundary. * * If the region cannot be added either because all regions are in use * or the CPU cannot support it a negative value is returned. On success * the register number for this entry is returned, but should be treated * as a cookie only. * * On a multiprocessor machine the changes are made to all processors. * * The available types are * * %MTRR_TYPE_UNCACHABLE - No caching * %MTRR_TYPE_WRBACK - Write data back in bursts whenever * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes * * BUGS: Needs a quiet flag for the cases where drivers do not mind * failures and do not wish system log messages to be sent. */ int mtrr_add_page (u64 base, u32 size, unsigned int type, char increment) { int i, max; mtrr_type ltype; u64 lbase, last; u32 lsize; if (base + size < 0x100) { printk (KERN_WARNING "mtrr: cannot set region below 1 MiB (0x%Lx000,0x%x000)\n", base, size); return -EINVAL; } #if 0 && defined(__x86_64__) && defined(CONFIG_AGP) { agp_kern_info info; if (type != MTRR_TYPE_UNCACHABLE && agp_copy_info(&info) >= 0 && base<= info.aper_base && (base<= info.aper_base+info.aper_size*1024*1024) printk(KERN_INFO "%s[%d] setting conflicting mtrr into agp aperture\n",current->comm,current->pid); } #endif /* Check upper bits of base and last are equal and lower bits are 0 for base and 1 for last */ last = base + size - 1; for (lbase = base; !(lbase & 1) && (last & 1); lbase = lbase >> 1, last = last >> 1) ; if (lbase != last) { printk (KERN_WARNING "mtrr: base(0x%Lx000) is not aligned on a size(0x%x000) boundary\n", base, size); return -EINVAL; } if (type >= MTRR_NUM_TYPES) { printk ("mtrr: type: %u illegal\n", type); return -EINVAL; } /* If the type is WC, check that this processor supports it */ if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) { printk (KERN_WARNING "mtrr: your processor doesn't support write-combining\n"); return -ENOSYS; } if (base & size_or_mask) { printk (KERN_WARNING "mtrr: base(%Lx) exceeds the MTRR width(%Lx)\n", base, size_or_mask); return -EINVAL; } if (size & size_or_mask) { printk (KERN_WARNING "mtrr: size exceeds the MTRR width\n"); return -EINVAL; } increment = increment ? 1 : 0; max = get_num_var_ranges (); /* Search for existing MTRR */ down (&mtrr_lock); for (i = 0; i < max; ++i) { get_mtrr (i, &lbase, &lsize, <ype); if (base >= lbase + lsize) continue; if ((base < lbase) && (base + size <= lbase)) continue; /* At this point we know there is some kind of overlap/enclosure */ if ((base < lbase) || (base + size > lbase + lsize)) { up (&mtrr_lock); printk (KERN_WARNING "mtrr: 0x%Lx000,0x%x000 overlaps existing" " 0x%Lx000,0x%x000\n", base, size, lbase, lsize); return -EINVAL; } /* New region is enclosed by an existing region */ if (ltype != type) { if (type == MTRR_TYPE_UNCACHABLE) continue; up (&mtrr_lock); printk ("mtrr: type mismatch for %Lx000,%x000 old: %s new: %s\n", base, size, attrib_to_str (ltype), attrib_to_str (type)); return -EINVAL; } if (increment) ++usage_table[i]; compute_ascii (); up (&mtrr_lock); return i; } /* Search for an empty MTRR */ i = get_free_region(); if (i < 0) { up (&mtrr_lock); printk ("mtrr: no more MTRRs available\n"); return i; } set_mtrr (i, base, size, type); usage_table[i] = 1; compute_ascii (); up (&mtrr_lock); return i; } /** * mtrr_add - Add a memory type region * @base: Physical base address of region * @size: Physical size of region * @type: Type of MTRR desired * @increment: If this is true do usage counting on the region * Return the MTRR register on success, else a negative numbe * indicating the error code. * * Memory type region registers control the caching on newer processors. * This function allows drivers to request an MTRR is added. * The caller should expect to need to provide a power of two size on * an equivalent power of two boundary. * * If the region cannot be added either because all regions are in use * or the CPU cannot support it a negative value is returned. On success * the register number for this entry is returned, but should be treated * as a cookie only. * * On a multiprocessor machine the changes are made to all processors. * This is required on x86 by the Intel processors. * * The available types are * * %MTRR_TYPE_UNCACHABLE - No caching * %MTRR_TYPE_WRBACK - Write data back in bursts whenever * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes * * BUGS: Needs a quiet flag for the cases where drivers do not mind * failures and do not wish system log messages to be sent. */ int mtrr_add (u64 base, u32 size, unsigned int type, char increment) { if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { printk ("mtrr: size and base must be multiples of 4 kiB\n"); printk ("mtrr: size: 0x%x base: 0x%Lx\n", size, base); return -EINVAL; } return mtrr_add_page (base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, increment); } /** * mtrr_del_page - delete a memory type region * @reg: Register returned by mtrr_add * @base: Physical base address * @size: Size of region * * If register is supplied then base and size are ignored. This is * how drivers should call it. * * Releases an MTRR region. If the usage count drops to zero the * register is freed and the region returns to default state. * On success the register is returned, on failure a negative error * code. */ int mtrr_del_page (int reg, u64 base, u32 size) { int i, max; mtrr_type ltype; u64 lbase; u32 lsize; max = get_num_var_ranges (); down (&mtrr_lock); if (reg < 0) { /* Search for existing MTRR */ for (i = 0; i < max; ++i) { get_mtrr (i, &lbase, &lsize, <ype); if (lbase == base && lsize == size) { reg = i; break; } } if (reg < 0) { up (&mtrr_lock); printk ("mtrr: no MTRR for %Lx000,%x000 found\n", base, size); return -EINVAL; } } if (reg >= max) { up (&mtrr_lock); printk ("mtrr: register: %d too big\n", reg); return -EINVAL; } get_mtrr (reg, &lbase, &lsize, <ype); if (lsize < 1) { up (&mtrr_lock); printk ("mtrr: MTRR %d not used\n", reg); return -EINVAL; } if (usage_table[reg] < 1) { up (&mtrr_lock); printk ("mtrr: reg: %d has count=0\n", reg); return -EINVAL; } if (--usage_table[reg] < 1) set_mtrr (reg, 0, 0, 0); compute_ascii (); up (&mtrr_lock); return reg; } /** * mtrr_del - delete a memory type region * @reg: Register returned by mtrr_add * @base: Physical base address * @size: Size of region * * If register is supplied then base and size are ignored. This is * how drivers should call it. * * Releases an MTRR region. If the usage count drops to zero the * register is freed and the region returns to default state. * On success the register is returned, on failure a negative error * code. */ int mtrr_del (int reg, u64 base, u32 size) { if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { printk ("mtrr: size and base must be multiples of 4 kiB\n"); printk ("mtrr: size: 0x%x base: 0x%Lx\n", size, base); return -EINVAL; } return mtrr_del_page (reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); } #ifdef USERSPACE_INTERFACE static int mtrr_file_add (u64 base, u32 size, unsigned int type, struct file *file, int page) { int reg, max; unsigned int *fcount = file->private_data; max = get_num_var_ranges (); if (fcount == NULL) { if ((fcount = kmalloc (max * sizeof *fcount, GFP_KERNEL)) == NULL) { printk ("mtrr: could not allocate\n"); return -ENOMEM; } memset (fcount, 0, max * sizeof *fcount); file->private_data = fcount; } if (!page) { if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { printk (KERN_INFO "mtrr: size and base must be multiples of 4 kiB\n"); printk (KERN_INFO "mtrr: size: 0x%x base: 0x%Lx\n", size, base); return -EINVAL; } base >>= PAGE_SHIFT; size >>= PAGE_SHIFT; } reg = mtrr_add_page (base, size, type, 1); if (reg >= 0) ++fcount[reg]; return reg; } static int mtrr_file_del (u64 base, u32 size, struct file *file, int page) { int reg; unsigned int *fcount = file->private_data; if (!page) { if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { printk (KERN_INFO "mtrr: size and base must be multiples of 4 kiB\n"); printk (KERN_INFO "mtrr: size: 0x%x base: 0x%Lx\n", size, base); return -EINVAL; } base >>= PAGE_SHIFT; size >>= PAGE_SHIFT; } reg = mtrr_del_page (-1, base, size); if (reg < 0) return reg; if (fcount == NULL) return reg; if (fcount[reg] < 1) return -EINVAL; --fcount[reg]; return reg; } static ssize_t mtrr_read (struct file *file, char *buf, size_t len, loff_t * ppos) { loff_t n = *ppos; unsigned pos = n; if (pos != n || pos >= ascii_buf_bytes) return 0; if (len > ascii_buf_bytes - pos) len = ascii_buf_bytes - pos; if (copy_to_user (buf, ascii_buffer + pos, len)) return -EFAULT; *ppos = pos + len; return len; } static ssize_t mtrr_write (struct file *file, const char *buf, size_t len, loff_t * ppos) /* Format of control line: "base=%Lx size=%Lx type=%s" OR: "disable=%d" */ { int i, err, reg; u64 base; u32 size; char *ptr; char line[LINE_SIZE]; if (!capable(CAP_SYS_ADMIN)) return -EPERM; /* Can't seek (pwrite) on this device */ if (ppos != &file->f_pos) return -ESPIPE; memset (line, 0, LINE_SIZE); if (len > LINE_SIZE) len = LINE_SIZE; if (copy_from_user (line, buf, len - 1)) return -EFAULT; ptr = line + strlen (line) - 1; if (*ptr == '\n') *ptr = '\0'; if (!strncmp (line, "disable=", 8)) { reg = simple_strtoul (line + 8, &ptr, 0); err = mtrr_del_page (reg, 0, 0); if (err < 0) return err; return len; } if (strncmp (line, "base=", 5)) { printk (KERN_INFO "mtrr: no \"base=\" in line: \"%s\"\n", line); return -EINVAL; } base = simple_strtoull (line + 5, &ptr, 0); for (; isspace (*ptr); ++ptr) ; if (strncmp (ptr, "size=", 5)) { printk (KERN_INFO "mtrr: no \"size=\" in line: \"%s\"\n", line); return -EINVAL; } size = simple_strtoull (ptr + 5, &ptr, 0); if ((base & 0xfff) || (size & 0xfff)) { printk (KERN_INFO "mtrr: size and base must be multiples of 4 kiB\n"); printk (KERN_INFO "mtrr: size: 0x%x base: 0x%Lx\n", size, base); return -EINVAL; } for (; isspace (*ptr); ++ptr) ; if (strncmp (ptr, "type=", 5)) { printk (KERN_INFO "mtrr: no \"type=\" in line: \"%s\"\n", line); return -EINVAL; } ptr += 5; for (; isspace (*ptr); ++ptr) ; for (i = 0; i < MTRR_NUM_TYPES; ++i) { if (strcmp (ptr, mtrr_strings[i])) continue; base >>= PAGE_SHIFT; size >>= PAGE_SHIFT; err = mtrr_add_page ((u64) base, size, i, 1); if (err < 0) return err; return len; } printk (KERN_INFO "mtrr: illegal type: \"%s\"\n", ptr); return -EINVAL; } static int mtrr_ioctl (struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { int err; mtrr_type type; struct mtrr_sentry sentry; struct mtrr_gentry gentry; switch (cmd) { default: return -ENOIOCTLCMD; case MTRRIOC_ADD_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user (&sentry, (void *) arg, sizeof sentry)) return -EFAULT; err = mtrr_file_add (sentry.base, sentry.size, sentry.type, file, 0); if (err < 0) return err; break; case MTRRIOC_SET_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user (&sentry, (void *) arg, sizeof sentry)) return -EFAULT; err = mtrr_add (sentry.base, sentry.size, sentry.type, 0); if (err < 0) return err; break; case MTRRIOC_DEL_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user (&sentry, (void *) arg, sizeof sentry)) return -EFAULT; err = mtrr_file_del (sentry.base, sentry.size, file, 0); if (err < 0) return err; break; case MTRRIOC_KILL_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user (&sentry, (void *) arg, sizeof sentry)) return -EFAULT; err = mtrr_del (-1, sentry.base, sentry.size); if (err < 0) return err; break; case MTRRIOC_GET_ENTRY: if (copy_from_user (&gentry, (void *) arg, sizeof gentry)) return -EFAULT; if (gentry.regnum >= get_num_var_ranges ()) return -EINVAL; get_mtrr (gentry.regnum, (u64*) &gentry.base, &gentry.size, &type); /* Hide entries that go above 4GB */ if (gentry.base + gentry.size > 0x100000 || gentry.size == 0x100000) gentry.base = gentry.size = gentry.type = 0; else { gentry.base <<= PAGE_SHIFT; gentry.size <<= PAGE_SHIFT; gentry.type = type; } if (copy_to_user ((void *) arg, &gentry, sizeof gentry)) return -EFAULT; break; case MTRRIOC_ADD_PAGE_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user (&sentry, (void *) arg, sizeof sentry)) return -EFAULT; err = mtrr_file_add (sentry.base, sentry.size, sentry.type, file, 1); if (err < 0) return err; break; case MTRRIOC_SET_PAGE_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user (&sentry, (void *) arg, sizeof sentry)) return -EFAULT; err = mtrr_add_page (sentry.base, sentry.size, sentry.type, 0); if (err < 0) return err; break; case MTRRIOC_DEL_PAGE_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user (&sentry, (void *) arg, sizeof sentry)) return -EFAULT; err = mtrr_file_del (sentry.base, sentry.size, file, 1); if (err < 0) return err; break; case MTRRIOC_KILL_PAGE_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user (&sentry, (void *) arg, sizeof sentry)) return -EFAULT; err = mtrr_del_page (-1, sentry.base, sentry.size); if (err < 0) return err; break; case MTRRIOC_GET_PAGE_ENTRY: if (copy_from_user (&gentry, (void *) arg, sizeof gentry)) return -EFAULT; if (gentry.regnum >= get_num_var_ranges ()) return -EINVAL; get_mtrr (gentry.regnum, (u64*) &gentry.base, &gentry.size, &type); gentry.type = type; if (copy_to_user ((void *) arg, &gentry, sizeof gentry)) return -EFAULT; break; } return 0; } static int mtrr_close (struct inode *ino, struct file *file) { int i, max; unsigned int *fcount = file->private_data; if (fcount == NULL) return 0; lock_kernel (); max = get_num_var_ranges (); for (i = 0; i < max; ++i) { while (fcount[i] > 0) { if (mtrr_del (i, 0, 0) < 0) printk ("mtrr: reg %d not used\n", i); --fcount[i]; } } unlock_kernel (); kfree (fcount); file->private_data = NULL; return 0; } static struct file_operations mtrr_fops = { owner: THIS_MODULE, read: mtrr_read, write: mtrr_write, ioctl: mtrr_ioctl, release:mtrr_close, }; #ifdef CONFIG_PROC_FS static struct proc_dir_entry *proc_root_mtrr; #endif static devfs_handle_t devfs_handle; static void compute_ascii (void) { char factor; int i, max; mtrr_type type; u64 base; u32 size; ascii_buf_bytes = 0; max = get_num_var_ranges (); for (i = 0; i < max; i++) { get_mtrr (i, &base, &size, &type); if (size == 0) usage_table[i] = 0; else { if (size < (0x100000 >> PAGE_SHIFT)) { /* less than 1MB */ factor = 'K'; size <<= PAGE_SHIFT - 10; } else { factor = 'M'; size >>= 20 - PAGE_SHIFT; } sprintf (ascii_buffer + ascii_buf_bytes, "reg%02i: base=0x%05Lx000 (%4iMB), size=%4i%cB: %s, count=%d\n", i, base, (u32) base >> (20 - PAGE_SHIFT), size, factor, attrib_to_str (type), usage_table[i]); ascii_buf_bytes += strlen (ascii_buffer + ascii_buf_bytes); } } devfs_set_file_size (devfs_handle, ascii_buf_bytes); #ifdef CONFIG_PROC_FS if (proc_root_mtrr) proc_root_mtrr->size = ascii_buf_bytes; #endif } #endif /* USERSPACE_INTERFACE */ EXPORT_SYMBOL (mtrr_add); EXPORT_SYMBOL (mtrr_del); static void __init mtrr_setup (void) { printk ("mtrr: v%s)\n", MTRR_VERSION); if (test_bit (X86_FEATURE_MTRR, boot_cpu_data.x86_capability)) { /* Query the width (in bits) of the physical addressable memory. This is an AMD specific MSR, but we assume(hope?) Intel will implement it too when they extend the width of the Xeon address bus. */ if (cpuid_eax (0x80000000) >= 0x80000008) { u32 phys_addr; phys_addr = cpuid_eax (0x80000008) & 0xff; size_or_mask = ~((1L << (phys_addr - PAGE_SHIFT)) - 1); /* * top bits MBZ as its beyond the addressable range. * bottom bits MBZ as we don't care about lower 12 bits of addr. */ size_and_mask = ~size_or_mask & 0xfff00000; } else { /* 36bit fallback */ size_or_mask = 0xff000000; size_and_mask = 0x00f00000; } } } #ifdef CONFIG_SMP static volatile u32 smp_changes_mask __initdata = 0; static struct mtrr_state smp_mtrr_state __initdata = { 0, 0 }; void __init mtrr_init_boot_cpu (void) { mtrr_setup(); get_mtrr_state (&smp_mtrr_state); } void __init mtrr_init_secondary_cpu (void) { u64 mask; int count; struct set_mtrr_context ctxt; /* Note that this is not ideal, since the cache is only flushed/disabled for this CPU while the MTRRs are changed, but changing this requires more invasive changes to the way the kernel boots */ set_mtrr_prepare (&ctxt); set_mtrr_disable (&ctxt); mask = set_mtrr_state (&smp_mtrr_state, &ctxt); set_mtrr_done (&ctxt); /* Use the atomic bitops to update the global mask */ for (count = 0; count < sizeof mask * 8; ++count) { if (mask & 0x01) set_bit (count, &smp_changes_mask); mask >>= 1; } } #endif /* CONFIG_SMP */ int __init mtrr_init (void) { #ifdef CONFIG_SMP /* mtrr_setup() should already have been called from mtrr_init_boot_cpu() */ finalize_mtrr_state (&smp_mtrr_state); mtrr_state_warn (smp_changes_mask); #else mtrr_setup(); #endif #ifdef CONFIG_PROC_FS proc_root_mtrr = create_proc_entry ("mtrr", S_IWUSR | S_IRUGO, &proc_root); if (proc_root_mtrr) { proc_root_mtrr->owner = THIS_MODULE; proc_root_mtrr->proc_fops = &mtrr_fops; } #endif #ifdef CONFIG_DEVFS_FS devfs_handle = devfs_register (NULL, "cpu/mtrr", DEVFS_FL_DEFAULT, 0, 0, S_IFREG | S_IRUGO | S_IWUSR, &mtrr_fops, NULL); #endif init_table (); return 0; }