/* * * * SMP support for ppc. * * Written by Cort Dougan (cort@cs.nmt.edu) borrowing a great * deal of code from the sparc and intel versions. * * Copyright (C) 1999 Cort Dougan * * PowerPC-64 Support added by Dave Engebretsen, Peter Bergner, and * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ #include #include #include #include #include #include #include #include #define __KERNEL_SYSCALLS__ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "open_pic.h" #include #include #if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) int (*dump_ipi_function_ptr)(struct pt_regs *); #include #endif #ifdef CONFIG_KDB #include #endif int smp_threads_ready = 0; volatile int smp_commenced = 0; int smp_num_cpus = 1; int smp_tb_synchronized = 0; extern atomic_t ipi_recv; extern atomic_t ipi_sent; spinlock_t kernel_flag __cacheline_aligned = SPIN_LOCK_UNLOCKED; cycles_t cacheflush_time; static int max_cpus __initdata = NR_CPUS; unsigned long cpu_online_map; volatile unsigned long cpu_callin_map[NR_CPUS] = {0,}; #define TB_SYNC_PASSES 4 volatile unsigned long __initdata tb_sync_flag = 0; volatile unsigned long __initdata tb_offset = 0; extern unsigned char stab_array[]; int start_secondary(void *); extern int cpu_idle(void *unused); void smp_call_function_interrupt(void); void smp_message_pass(int target, int msg, unsigned long data, int wait); static unsigned long iSeries_smp_message[NR_CPUS]; void xics_setup_cpu(void); void xics_cause_IPI(int cpu); long h_register_vpa(unsigned long flags, unsigned long proc, unsigned long vpa); /* * XICS only has a single IPI, so encode the messages per CPU */ volatile unsigned long xics_ipi_message[NR_CPUS] = {0}; #define smp_message_pass(t,m,d,w) \ do { atomic_inc(&ipi_sent); \ ppc_md.smp_message_pass((t),(m),(d),(w)); \ } while(0) #ifdef CONFIG_KDB /* save regs here before calling kdb_ipi */ struct pt_regs *kdb_smp_regs[NR_CPUS]; /* called for each processor.. drop each into kdb. */ void smp_kdb_stop_proc(void) { kdb_ipi(kdb_smp_regs[smp_processor_id()], NULL); } void smp_kdb_stop(void) { int ret=0; ret = smp_call_function(smp_kdb_stop_proc, NULL, 1, 0); } #endif static inline void set_tb(unsigned int upper, unsigned int lower) { mtspr(SPRN_TBWL, 0); mtspr(SPRN_TBWU, upper); mtspr(SPRN_TBWL, lower); } void iSeries_smp_message_recv( struct pt_regs * regs ) { int cpu = smp_processor_id(); int msg; if ( smp_num_cpus < 2 ) return; for ( msg = 0; msg < 4; ++msg ) if ( test_and_clear_bit( msg, &iSeries_smp_message[cpu] ) ) smp_message_recv( msg, regs ); } static void smp_iSeries_message_pass(int target, int msg, unsigned long data, int wait) { int i; for (i = 0; i < smp_num_cpus; ++i) { if ( (target == MSG_ALL) || (target == i) || ((target == MSG_ALL_BUT_SELF) && (i != smp_processor_id())) ) { set_bit( msg, &iSeries_smp_message[i] ); HvCall_sendIPI(&(paca[i])); } } } static int smp_iSeries_numProcs(void) { unsigned np, i; struct ItLpPaca * lpPaca; np = 0; for (i=0; i < MAX_PACAS; ++i) { lpPaca = paca[i].xLpPacaPtr; if ( lpPaca->xDynProcStatus < 2 ) { ++np; } } return np; } static int smp_iSeries_probe(void) { unsigned i; unsigned np; struct ItLpPaca * lpPaca; np = 0; for (i=0; i < MAX_PACAS; ++i) { lpPaca = paca[i].xLpPacaPtr; if ( lpPaca->xDynProcStatus < 2 ) { ++np; paca[i].next_jiffy_update_tb = paca[0].next_jiffy_update_tb; } } smp_tb_synchronized = 1; return np; } static void smp_iSeries_kick_cpu(int nr) { struct ItLpPaca * lpPaca; /* Verify we have a Paca for processor nr */ if ( ( nr <= 0 ) || ( nr >= MAX_PACAS ) ) return; /* Verify that our partition has a processor nr */ lpPaca = paca[nr].xLpPacaPtr; if ( lpPaca->xDynProcStatus >= 2 ) return; /* The information for processor bringup must * be written out to main store before we release * the processor. */ mb(); /* The processor is currently spinning, waiting * for the xProcStart field to become non-zero * After we set xProcStart, the processor will * continue on to secondary_start in iSeries_head.S */ paca[nr].xProcStart = 1; } static void smp_iSeries_setup_cpu(int nr) { } /* This is called very early. */ void smp_init_iSeries(void) { ppc_md.smp_message_pass = smp_iSeries_message_pass; ppc_md.smp_probe = smp_iSeries_probe; ppc_md.smp_kick_cpu = smp_iSeries_kick_cpu; ppc_md.smp_setup_cpu = smp_iSeries_setup_cpu; systemcfg->processorCount = smp_iSeries_numProcs(); } static void smp_openpic_message_pass(int target, int msg, unsigned long data, int wait) { /* make sure we're sending something that translates to an IPI */ if ( msg > 0x3 ){ printk("SMP %d: smp_message_pass: unknown msg %d\n", smp_processor_id(), msg); return; } switch ( target ) { case MSG_ALL: openpic_cause_IPI(msg, 0xffffffff); break; case MSG_ALL_BUT_SELF: openpic_cause_IPI(msg, 0xffffffff & ~(1 << smp_processor_id())); break; default: openpic_cause_IPI(msg, 1<processorCount > 1) openpic_request_IPIs(); return systemcfg->processorCount; } static void smp_kick_cpu(int nr) { /* Verify we have a Paca for processor nr */ if ( ( nr <= 0 ) || ( nr >= MAX_PACAS ) ) return; /* The information for processor bringup must * be written out to main store before we release * the processor. */ mb(); /* The processor is currently spinning, waiting * for the xProcStart field to become non-zero * After we set xProcStart, the processor will * continue on to secondary_start in iSeries_head.S */ paca[nr].xProcStart = 1; } static void smp_space_timers( unsigned nr ) { unsigned long offset, i; offset = tb_ticks_per_jiffy / nr; for ( i=1; iplatform == PLATFORM_PSERIES_LPAR) { /* timebases already synced under the hypervisor. */ paca[cpu_nr].next_jiffy_update_tb = tb_last_stamp = get_tb(); if (cpu_nr == 0) { systemcfg->tb_orig_stamp = tb_last_stamp; /* Should update naca->stamp_xsec. * For now we leave it which means the time can be some * number of msecs off until someone does a settimeofday() */ } smp_tb_synchronized = 1; } else { if (cpu_nr == 0) { /* wait for all the others */ while (atomic_read(&ready) < smp_num_cpus) barrier(); atomic_set(&ready, 1); /* freeze the timebase */ rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL); mb(); frozen = 1; set_tb(0, 0); paca[0].next_jiffy_update_tb = 0; smp_space_timers(smp_num_cpus); while (atomic_read(&ready) < smp_num_cpus) barrier(); /* thaw the timebase again */ rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL); mb(); frozen = 0; tb_last_stamp = get_tb(); systemcfg->tb_orig_stamp = tb_last_stamp; smp_tb_synchronized = 1; } else { atomic_inc(&ready); while (!frozen) barrier(); set_tb(0, 0); mb(); atomic_inc(&ready); while (frozen) barrier(); } } if (OpenPIC_Addr) { do_openpic_setup_cpu(); } else { if (cpu_nr > 0) xics_setup_cpu(); } } static void smp_xics_message_pass(int target, int msg, unsigned long data, int wait) { int i; for (i = 0; i < smp_num_cpus; ++i) { if (target == MSG_ALL || target == i || (target == MSG_ALL_BUT_SELF && i != smp_processor_id())) { set_bit(msg, &xics_ipi_message[i]); mb(); xics_cause_IPI(i); } } } static int smp_xics_probe(void) { return systemcfg->processorCount; } /* This is called very early */ void smp_init_pSeries(void) { if(naca->interrupt_controller == IC_OPEN_PIC) { ppc_md.smp_message_pass = smp_openpic_message_pass; ppc_md.smp_probe = smp_chrp_probe; ppc_md.smp_kick_cpu = smp_kick_cpu; ppc_md.smp_setup_cpu = smp_chrp_setup_cpu; } else { ppc_md.smp_message_pass = smp_xics_message_pass; ppc_md.smp_probe = smp_xics_probe; ppc_md.smp_kick_cpu = smp_kick_cpu; ppc_md.smp_setup_cpu = smp_chrp_setup_cpu; } } void smp_local_timer_interrupt(struct pt_regs * regs) { if (!--(get_paca()->prof_counter)) { update_process_times(user_mode(regs)); (get_paca()->prof_counter)=get_paca()->prof_multiplier; } } void smp_message_recv(int msg, struct pt_regs *regs) { atomic_inc(&ipi_recv); switch( msg ) { case PPC_MSG_CALL_FUNCTION: #ifdef CONFIG_KDB kdb_smp_regs[smp_processor_id()]=regs; #endif smp_call_function_interrupt(); break; case PPC_MSG_RESCHEDULE: current->need_resched = 1; break; #ifdef CONFIG_XMON case PPC_MSG_XMON_BREAK: /* ToDo: need a nmi way to handle this. Soft disable? */ #if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) if (dump_ipi_function_ptr) { printk(KERN_ALERT "got dump ipi...\n"); dump_ipi_function_ptr(regs); } else #endif xmon(regs); break; #endif /* CONFIG_XMON */ default: printk("SMP %d: smp_message_recv(): unknown msg %d\n", smp_processor_id(), msg); break; } } void smp_send_reschedule(int cpu) { if ((systemcfg->platform & PLATFORM_LPAR) && (paca[cpu].yielded == 1)) { #ifdef CONFIG_PPC_ISERIES HvCall_sendLpProd(cpu); #else prod_processor(cpu); #endif } else { smp_message_pass(cpu, PPC_MSG_RESCHEDULE, 0, 0); } } #ifdef CONFIG_XMON void smp_send_xmon_break(int cpu) { smp_message_pass(cpu, PPC_MSG_XMON_BREAK, 0, 0); } #endif /* CONFIG_XMON */ #if defined(CONFIG_DUMP) || defined(CONFIG_DUMP_MODULE) void dump_send_ipi(int (*dump_ipi_callback)(struct pt_regs *)) { dump_ipi_function_ptr = dump_ipi_callback; if (dump_ipi_callback) { printk(KERN_ALERT "dump_send_ipi...\n"); mb(); smp_message_pass(MSG_ALL_BUT_SELF, PPC_MSG_XMON_BREAK, 0, 0); } } #endif static void stop_this_cpu(void *dummy) { __cli(); while (1) ; } void smp_send_stop(void) { smp_call_function(stop_this_cpu, NULL, 1, 0); smp_num_cpus = 1; } /* * Structure and data for smp_call_function(). This is designed to minimise * static memory requirements. It also looks cleaner. * Stolen from the i386 version. */ static spinlock_t call_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; static struct call_data_struct { void (*func) (void *info); void *info; atomic_t started; atomic_t finished; int wait; } *call_data; /* * This function sends a 'generic call function' IPI to all other CPUs * in the system. * * [SUMMARY] Run a function on all other CPUs. * The function to run. This must be fast and non-blocking. * An arbitrary pointer to pass to the function. * currently unused. * If true, wait (atomically) until function has completed on other CPUs. * [RETURNS] 0 on success, else a negative status code. Does not return until * remote CPUs are nearly ready to execute <> or are or have executed. * * You must not call this function with disabled interrupts or from a * hardware interrupt handler or from a bottom half handler. */ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, int wait) { struct call_data_struct data; int ret = -1, cpus = smp_num_cpus-1; int timeout; if (!cpus) return 0; data.func = func; data.info = info; atomic_set(&data.started, 0); data.wait = wait; if (wait) atomic_set(&data.finished, 0); spin_lock_bh(&call_lock); call_data = &data; /* Send a message to all other CPUs and wait for them to respond */ smp_message_pass(MSG_ALL_BUT_SELF, PPC_MSG_CALL_FUNCTION, 0, 0); /* Wait for response */ timeout = 8000000; while (atomic_read(&data.started) != cpus) { HMT_low(); if (--timeout == 0) { printk("smp_call_function on cpu %d: other cpus not responding (%d)\n", smp_processor_id(), atomic_read(&data.started)); #ifdef CONFIG_XMON xmon(0); #endif #ifdef CONFIG_KDB kdb(KDB_REASON_CALL,0, (kdb_eframe_t) 0); #endif #ifdef CONFIG_PPC_ISERIES HvCall_terminateMachineSrc(); #endif goto out; } barrier(); udelay(1); } if (wait) { timeout = 1000000; while (atomic_read(&data.finished) != cpus) { HMT_low(); if (--timeout == 0) { printk("smp_call_function on cpu %d: other cpus not finishing (%d/%d)\n", smp_processor_id(), atomic_read(&data.finished), atomic_read(&data.started)); #ifdef CONFIG_PPC_ISERIES HvCall_terminateMachineSrc(); #endif goto out; } barrier(); udelay(1); } } ret = 0; out: call_data = NULL; HMT_medium(); spin_unlock_bh(&call_lock); return ret; } void smp_call_function_interrupt(void) { void (*func) (void *info); void *info; int wait; /* call_data will be NULL if the sender timed out while * waiting on us to receive the call. */ if (!call_data) return; func = call_data->func; info = call_data->info; wait = call_data->wait; /* * Notify initiating CPU that I've grabbed the data and am * about to execute the function */ atomic_inc(&call_data->started); /* * At this point the info structure may be out of scope unless wait==1 */ (*func)(info); if (wait) atomic_inc(&call_data->finished); } extern unsigned long decr_overclock; void __init smp_boot_cpus(void) { extern struct current_set_struct current_set[]; extern void __secondary_start_chrp(void); int i, cpu_nr; struct task_struct *p; unsigned long sp; printk("Entering SMP Mode...\n"); PPCDBG(PPCDBG_SMP, "smp_boot_cpus: start. NR_CPUS = 0x%lx\n", NR_CPUS); smp_num_cpus = 1; smp_store_cpu_info(0); cpu_online_map = 1UL; /* * assume for now that the first cpu booted is * cpu 0, the master -- Cort */ cpu_callin_map[0] = 1; current->processor = 0; init_idle(); for (i = 0; i < NR_CPUS; i++) { paca[i].prof_counter = 1; paca[i].prof_multiplier = 1; if(i != 0) { /* * Processor 0's segment table is statically * initialized to real address STAB0_PHYS_ADDR. The * Other processor's tables are created and * initialized here. */ paca[i].xStab_data.virt = (unsigned long)&stab_array[PAGE_SIZE * (i-1)]; memset((void *)paca[i].xStab_data.virt, 0, PAGE_SIZE); paca[i].xStab_data.real = __v2a(paca[i].xStab_data.virt); paca[i].default_decr = tb_ticks_per_jiffy / decr_overclock; } } /* * XXX very rough, assumes 20 bus cycles to read a cache line, * timebase increments every 4 bus cycles, 32kB L1 data cache. */ cacheflush_time = 5 * 1024; /* Probe arch for CPUs */ cpu_nr = ppc_md.smp_probe(); printk("Probe found %d CPUs\n", cpu_nr); /* * only check for cpus we know exist. We keep the callin map * with cpus at the bottom -- Cort */ if (cpu_nr > max_cpus) cpu_nr = max_cpus; #ifdef CONFIG_PPC_ISERIES smp_space_timers( cpu_nr ); #endif printk("Waiting for %d CPUs\n", cpu_nr-1); for ( i = 1 ; i < cpu_nr; i++ ) { int c; struct pt_regs regs; /* create a process for the processor */ /* we don't care about the values in regs since we'll never reschedule the forked task. */ /* We DO care about one bit in the pt_regs we pass to do_fork. That is the MSR_FP bit in regs.msr. If that bit is on, then do_fork (via copy_thread) will call giveup_fpu. giveup_fpu will get a pointer to our (current's) last register savearea via current->thread.regs and using that pointer will turn off the MSR_FP, MSR_FE0 and MSR_FE1 bits. At this point, this pointer is pointing to some arbitrary point within our stack */ memset(®s, 0, sizeof(struct pt_regs)); if (do_fork(CLONE_VM|CLONE_PID, 0, ®s, 0) < 0) panic("failed fork for CPU %d", i); p = init_task.prev_task; if (!p) panic("No idle task for CPU %d", i); PPCDBG(PPCDBG_SMP,"\tProcessor %d, task = 0x%lx\n", i, p); del_from_runqueue(p); unhash_process(p); init_tasks[i] = p; p->processor = i; p->cpus_runnable = 1 << i; /* we schedule the first task manually */ current_set[i].task = p; sp = ((unsigned long)p) + sizeof(union task_union) - STACK_FRAME_OVERHEAD; current_set[i].sp_real = (void *)__v2a(sp); /* wake up cpus */ ppc_md.smp_kick_cpu(i); /* * wait to see if the cpu made a callin (is actually up). * use this value that I found through experimentation. * -- Cort */ for ( c = 5000; c && !cpu_callin_map[i] ; c-- ) { udelay(100); } if ( cpu_callin_map[i] ) { printk("Processor %d found.\n", i); PPCDBG(PPCDBG_SMP, "\tProcessor %d found.\n", i); /* this sync's the decr's -- Cort */ smp_num_cpus++; } else { printk("Processor %d is stuck.\n", i); PPCDBG(PPCDBG_SMP, "\tProcessor %d is stuck.\n", i); } } /* Setup CPU 0 last (important) */ ppc_md.smp_setup_cpu(0); if (smp_num_cpus < 2) { tb_last_stamp = get_tb(); smp_tb_synchronized = 1; } } void __init smp_commence(void) { /* * Lets the callin's below out of their loop. */ PPCDBG(PPCDBG_SMP, "smp_commence: start\n"); wmb(); smp_commenced = 1; } void __init smp_callin(void) { int cpu = current->processor; smp_store_cpu_info(cpu); set_dec(paca[cpu].default_decr); cpu_callin_map[cpu] = 1; ppc_md.smp_setup_cpu(cpu); init_idle(); set_bit(smp_processor_id(), &cpu_online_map); while(!smp_commenced) { barrier(); } __sti(); } /* intel needs this */ void __init initialize_secondary(void) { } /* Activate a secondary processor. */ int start_secondary(void *unused) { int cpu; cpu = current->processor; atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; smp_callin(); get_paca()->yielded = 0; if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { vpa_init(cpu); } /* Go into the idle loop. */ return cpu_idle(NULL); } void __init smp_setup(char *str, int *ints) { } int setup_profiling_timer(unsigned int multiplier) { return 0; } /* this function is called for each processor */ void __init smp_store_cpu_info(int id) { paca[id].pvr = _get_PVR(); } static int __init maxcpus(char *str) { get_option(&str, &max_cpus); return 1; } __setup("maxcpus=", maxcpus);