1Block io priorities 2=================== 3 4 5Intro 6----- 7 8With the introduction of cfq v3 (aka cfq-ts or time sliced cfq), basic io 9priorities are supported for reads on files. This enables users to io nice 10processes or process groups, similar to what has been possible with cpu 11scheduling for ages. This document mainly details the current possibilities 12with cfq; other io schedulers do not support io priorities thus far. 13 14Scheduling classes 15------------------ 16 17CFQ implements three generic scheduling classes that determine how io is 18served for a process. 19 20IOPRIO_CLASS_RT: This is the realtime io class. This scheduling class is given 21higher priority than any other in the system, processes from this class are 22given first access to the disk every time. Thus it needs to be used with some 23care, one io RT process can starve the entire system. Within the RT class, 24there are 8 levels of class data that determine exactly how much time this 25process needs the disk for on each service. In the future this might change 26to be more directly mappable to performance, by passing in a wanted data 27rate instead. 28 29IOPRIO_CLASS_BE: This is the best-effort scheduling class, which is the default 30for any process that hasn't set a specific io priority. The class data 31determines how much io bandwidth the process will get, it's directly mappable 32to the cpu nice levels just more coarsely implemented. 0 is the highest 33BE prio level, 7 is the lowest. The mapping between cpu nice level and io 34nice level is determined as: io_nice = (cpu_nice + 20) / 5. 35 36IOPRIO_CLASS_IDLE: This is the idle scheduling class, processes running at this 37level only get io time when no one else needs the disk. The idle class has no 38class data, since it doesn't really apply here. 39 40Tools 41----- 42 43See below for a sample ionice tool. Usage: 44 45# ionice -c<class> -n<level> -p<pid> 46 47If pid isn't given, the current process is assumed. IO priority settings 48are inherited on fork, so you can use ionice to start the process at a given 49level: 50 51# ionice -c2 -n0 /bin/ls 52 53will run ls at the best-effort scheduling class at the highest priority. 54For a running process, you can give the pid instead: 55 56# ionice -c1 -n2 -p100 57 58will change pid 100 to run at the realtime scheduling class, at priority 2. 59 60---> snip ionice.c tool <--- 61 62#include <stdio.h> 63#include <stdlib.h> 64#include <errno.h> 65#include <getopt.h> 66#include <unistd.h> 67#include <sys/ptrace.h> 68#include <asm/unistd.h> 69 70extern int sys_ioprio_set(int, int, int); 71extern int sys_ioprio_get(int, int); 72 73#if defined(__i386__) 74#define __NR_ioprio_set 289 75#define __NR_ioprio_get 290 76#elif defined(__ppc__) 77#define __NR_ioprio_set 273 78#define __NR_ioprio_get 274 79#elif defined(__x86_64__) 80#define __NR_ioprio_set 251 81#define __NR_ioprio_get 252 82#elif defined(__ia64__) 83#define __NR_ioprio_set 1274 84#define __NR_ioprio_get 1275 85#else 86#error "Unsupported arch" 87#endif 88 89static inline int ioprio_set(int which, int who, int ioprio) 90{ 91 return syscall(__NR_ioprio_set, which, who, ioprio); 92} 93 94static inline int ioprio_get(int which, int who) 95{ 96 return syscall(__NR_ioprio_get, which, who); 97} 98 99enum { 100 IOPRIO_CLASS_NONE, 101 IOPRIO_CLASS_RT, 102 IOPRIO_CLASS_BE, 103 IOPRIO_CLASS_IDLE, 104}; 105 106enum { 107 IOPRIO_WHO_PROCESS = 1, 108 IOPRIO_WHO_PGRP, 109 IOPRIO_WHO_USER, 110}; 111 112#define IOPRIO_CLASS_SHIFT 13 113 114const char *to_prio[] = { "none", "realtime", "best-effort", "idle", }; 115 116int main(int argc, char *argv[]) 117{ 118 int ioprio = 4, set = 0, ioprio_class = IOPRIO_CLASS_BE; 119 int c, pid = 0; 120 121 while ((c = getopt(argc, argv, "+n:c:p:")) != EOF) { 122 switch (c) { 123 case 'n': 124 ioprio = strtol(optarg, NULL, 10); 125 set = 1; 126 break; 127 case 'c': 128 ioprio_class = strtol(optarg, NULL, 10); 129 set = 1; 130 break; 131 case 'p': 132 pid = strtol(optarg, NULL, 10); 133 break; 134 } 135 } 136 137 switch (ioprio_class) { 138 case IOPRIO_CLASS_NONE: 139 ioprio_class = IOPRIO_CLASS_BE; 140 break; 141 case IOPRIO_CLASS_RT: 142 case IOPRIO_CLASS_BE: 143 break; 144 case IOPRIO_CLASS_IDLE: 145 ioprio = 7; 146 break; 147 default: 148 printf("bad prio class %d\n", ioprio_class); 149 return 1; 150 } 151 152 if (!set) { 153 if (!pid && argv[optind]) 154 pid = strtol(argv[optind], NULL, 10); 155 156 ioprio = ioprio_get(IOPRIO_WHO_PROCESS, pid); 157 158 printf("pid=%d, %d\n", pid, ioprio); 159 160 if (ioprio == -1) 161 perror("ioprio_get"); 162 else { 163 ioprio_class = ioprio >> IOPRIO_CLASS_SHIFT; 164 ioprio = ioprio & 0xff; 165 printf("%s: prio %d\n", to_prio[ioprio_class], ioprio); 166 } 167 } else { 168 if (ioprio_set(IOPRIO_WHO_PROCESS, pid, ioprio | ioprio_class << IOPRIO_CLASS_SHIFT) == -1) { 169 perror("ioprio_set"); 170 return 1; 171 } 172 173 if (argv[optind]) 174 execvp(argv[optind], &argv[optind]); 175 } 176 177 return 0; 178} 179 180---> snip ionice.c tool <--- 181 182 183March 11 2005, Jens Axboe <jens.axboe@oracle.com> 184