/* * Copyright (C) 1993-1996 Bas Laarhoven. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. * * $Source: /homes/cvs/ftape-stacked/ftape/lowlevel/ftape-calibr.c,v $ * $Revision: 1.2 $ * $Date: 1997/10/05 19:18:08 $ * * GP calibration routine for processor speed dependent * functions. */ #include #include #include #include #include #if defined(__alpha__) # include #elif defined(__x86_64__) # include # include #elif defined(__i386__) # include #endif #include #include "../lowlevel/ftape-tracing.h" #include "../lowlevel/ftape-calibr.h" #include "../lowlevel/fdc-io.h" #undef DEBUG #if !defined(__alpha__) && !defined(__i386__) && !defined(__x86_64__) # error Ftape is not implemented for this architecture! #endif #if defined(__alpha__) || defined(__x86_64__) static unsigned long ps_per_cycle = 0; #endif #if defined(__i386__) extern spinlock_t i8253_lock; #endif /* * Note: On Intel PCs, the clock ticks at 100 Hz (HZ==100) which is * too slow for certain timeouts (and that clock doesn't even tick * when interrupts are disabled). For that reason, the 8254 timer is * used directly to implement fine-grained timeouts. However, on * Alpha PCs, the 8254 is *not* used to implement the clock tick * (which is 1024 Hz, normally) and the 8254 timer runs at some * "random" frequency (it seems to run at 18Hz, but its not safe to * rely on this value). Instead, we use the Alpha's "rpcc" * instruction to read cycle counts. As this is a 32 bit counter, * it will overflow only once per 30 seconds (on a 200MHz machine), * which is plenty. */ unsigned int ftape_timestamp(void) { #if defined(__alpha__) unsigned long r; asm volatile ("rpcc %0" : "=r" (r)); return r; #elif defined(__x86_64__) unsigned long r; rdtscl(r); return r; #elif defined(__i386__) /* * Note that there is some time between counter underflowing and jiffies * increasing, so the code below won't always give correct output. * -Vojtech */ unsigned long flags; __u16 lo; __u16 hi; spin_lock_irqsave(&i8253_lock, flags); outb_p(0x00, 0x43); /* latch the count ASAP */ lo = inb_p(0x40); /* read the latched count */ lo |= inb(0x40) << 8; hi = jiffies; spin_unlock_irqrestore(&i8253_lock, flags); return ((hi + 1) * (unsigned int) LATCH) - lo; /* downcounter ! */ #endif } static unsigned int short_ftape_timestamp(void) { #if defined(__alpha__) || defined(__x86_64__) return ftape_timestamp(); #elif defined(__i386__) unsigned int count; unsigned long flags; spin_lock_irqsave(&i8253_lock, flags); outb_p(0x00, 0x43); /* latch the count ASAP */ count = inb_p(0x40); /* read the latched count */ count |= inb(0x40) << 8; spin_unlock_irqrestore(&i8253_lock, flags); return (LATCH - count); /* normal: downcounter */ #endif } static unsigned int diff(unsigned int t0, unsigned int t1) { #if defined(__alpha__) || defined(__x86_64__) return (t1 - t0); #elif defined(__i386__) /* * This is tricky: to work for both short and full ftape_timestamps * we'll have to discriminate between these. * If it _looks_ like short stamps with wrapping around we'll * asume it are. This will generate a small error if it really * was a (very large) delta from full ftape_timestamps. */ return (t1 <= t0 && t0 <= LATCH) ? t1 + LATCH - t0 : t1 - t0; #endif } static unsigned int usecs(unsigned int count) { #if defined(__alpha__) || defined(__x86_64__) return (ps_per_cycle * count) / 1000000UL; #elif defined(__i386__) return (10000 * count) / ((CLOCK_TICK_RATE + 50) / 100); #endif } unsigned int ftape_timediff(unsigned int t0, unsigned int t1) { /* * Calculate difference in usec for ftape_timestamp results t0 & t1. * Note that on the i386 platform with short time-stamps, the * maximum allowed timespan is 1/HZ or we'll lose ticks! */ return usecs(diff(t0, t1)); } /* To get an indication of the I/O performance, * measure the duration of the inb() function. */ static void time_inb(void) { int i; int t0, t1; unsigned long flags; int status; TRACE_FUN(ft_t_any); save_flags(flags); cli(); t0 = short_ftape_timestamp(); for (i = 0; i < 1000; ++i) status = inb(fdc.msr); t1 = short_ftape_timestamp(); restore_flags(flags); TRACE(ft_t_info, "inb() duration: %d nsec", ftape_timediff(t0, t1)); TRACE_EXIT; } static void init_clock(void) { TRACE_FUN(ft_t_any); #if defined(__x86_64__) ps_per_cycle = 1000000000UL / cpu_khz; #elif defined(__alpha__) extern struct hwrpb_struct *hwrpb; ps_per_cycle = (1000*1000*1000*1000UL) / hwrpb->cycle_freq; #endif TRACE_EXIT; } /* * Input: function taking int count as parameter. * pointers to calculated calibration variables. */ void ftape_calibrate(char *name, void (*fun) (unsigned int), unsigned int *calibr_count, unsigned int *calibr_time) { static int first_time = 1; int i; unsigned int tc = 0; unsigned int count; unsigned int time; #if defined(__i386__) unsigned int old_tc = 0; unsigned int old_count = 1; unsigned int old_time = 1; #endif TRACE_FUN(ft_t_flow); if (first_time) { /* get idea of I/O performance */ init_clock(); time_inb(); first_time = 0; } /* value of timeout must be set so that on very slow systems * it will give a time less than one jiffy, and on * very fast systems it'll give reasonable precision. */ count = 40; for (i = 0; i < 15; ++i) { unsigned int t0; unsigned int t1; unsigned int once; unsigned int multiple; unsigned long flags; *calibr_count = *calibr_time = count; /* set TC to 1 */ save_flags(flags); cli(); fun(0); /* dummy, get code into cache */ t0 = short_ftape_timestamp(); fun(0); /* overhead + one test */ t1 = short_ftape_timestamp(); once = diff(t0, t1); t0 = short_ftape_timestamp(); fun(count); /* overhead + count tests */ t1 = short_ftape_timestamp(); multiple = diff(t0, t1); restore_flags(flags); time = ftape_timediff(0, multiple - once); tc = (1000 * time) / (count - 1); TRACE(ft_t_any, "once:%3d us,%6d times:%6d us, TC:%5d ns", usecs(once), count - 1, usecs(multiple), tc); #if defined(__alpha__) || defined(__x86_64__) /* * Increase the calibration count exponentially until the * calibration time exceeds 100 ms. */ if (time >= 100*1000) break; #elif defined(__i386__) /* * increase the count until the resulting time nears 2/HZ, * then the tc will drop sharply because we lose LATCH counts. */ if (tc <= old_tc / 2) { time = old_time; count = old_count; break; } old_tc = tc; old_count = count; old_time = time; #endif count *= 2; } *calibr_count = count - 1; *calibr_time = time; TRACE(ft_t_info, "TC for `%s()' = %d nsec (at %d counts)", name, (1000 * *calibr_time) / *calibr_count, *calibr_count); TRACE_EXIT; }