#include <asm/timer.h>
#include <asm/starfire.h>
#include <asm/tlb.h>
+#include <asm/sections.h>
extern void calibrate_delay(void);
cpu_data(id).clock_tick = prom_getintdefault(cpu_node,
"clock-frequency", 0);
- cpu_data(id).pgcache_size = 0;
- cpu_data(id).pte_cache[0] = NULL;
- cpu_data(id).pte_cache[1] = NULL;
- cpu_data(id).pgd_cache = NULL;
cpu_data(id).idle_volume = 1;
cpu_data(id).dcache_size = prom_getintdefault(cpu_node, "dcache-size",
static volatile unsigned long callin_flag = 0;
-extern void inherit_locked_prom_mappings(int save_p);
-
-static inline void cpu_setup_percpu_base(unsigned long cpu_id)
-{
- __asm__ __volatile__("mov %0, %%g5\n\t"
- "stxa %0, [%1] %2\n\t"
- "membar #Sync"
- : /* no outputs */
- : "r" (__per_cpu_offset(cpu_id)),
- "r" (TSB_REG), "i" (ASI_IMMU));
-}
-
void __init smp_callin(void)
{
int cpuid = hard_smp_processor_id();
- inherit_locked_prom_mappings(0);
+ __local_per_cpu_offset = __per_cpu_offset(cpuid);
- __flush_tlb_all();
+ if (tlb_type == hypervisor)
+ sun4v_ktsb_register();
- cpu_setup_percpu_base(cpuid);
+ __flush_tlb_all();
smp_setup_percpu_timer();
p = fork_idle(cpu);
callin_flag = 0;
- cpu_new_thread = p->thread_info;
+ cpu_new_thread = task_thread_info(p);
cpu_set(cpu, cpu_callout_map);
cpu_find_by_mid(cpu, &cpu_node);
static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
{
u64 pstate, ver;
- int nack_busy_id, is_jalapeno;
+ int nack_busy_id, is_jbus;
if (cpus_empty(mask))
return;
* derivative processor.
*/
__asm__ ("rdpr %%ver, %0" : "=r" (ver));
- is_jalapeno = ((ver >> 32) == 0x003e0016);
+ is_jbus = ((ver >> 32) == __JALAPENO_ID ||
+ (ver >> 32) == __SERRANO_ID);
__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
for_each_cpu_mask(i, mask) {
u64 target = (i << 14) | 0x70;
- if (!is_jalapeno)
+ if (!is_jbus)
target |= (nack_busy_id << 24);
__asm__ __volatile__(
"stxa %%g0, [%0] %1\n\t"
for_each_cpu_mask(i, mask) {
u64 check_mask;
- if (is_jalapeno)
+ if (is_jbus)
check_mask = (0x2UL << (2*i));
else
check_mask = (0x2UL <<
}
}
+#if 0
+/* Multi-cpu list version. */
+static int init_cpu_list(u16 *list, cpumask_t mask)
+{
+ int i, cnt;
+
+ cnt = 0;
+ for_each_cpu_mask(i, mask)
+ list[cnt++] = i;
+
+ return cnt;
+}
+
+static int update_cpu_list(u16 *list, int orig_cnt, cpumask_t mask)
+{
+ int i;
+
+ for (i = 0; i < orig_cnt; i++) {
+ if (list[i] == 0xffff)
+ cpu_clear(i, mask);
+ }
+
+ return init_cpu_list(list, mask);
+}
+
+static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
+{
+ int this_cpu = get_cpu();
+ struct trap_per_cpu *tb = &trap_block[this_cpu];
+ u64 *mondo = __va(tb->cpu_mondo_block_pa);
+ u16 *cpu_list = __va(tb->cpu_list_pa);
+ int cnt, retries;
+
+ mondo[0] = data0;
+ mondo[1] = data1;
+ mondo[2] = data2;
+ wmb();
+
+ retries = 0;
+ cnt = init_cpu_list(cpu_list, mask);
+ do {
+ register unsigned long func __asm__("%o5");
+ register unsigned long arg0 __asm__("%o0");
+ register unsigned long arg1 __asm__("%o1");
+ register unsigned long arg2 __asm__("%o2");
+
+ func = HV_FAST_CPU_MONDO_SEND;
+ arg0 = cnt;
+ arg1 = tb->cpu_list_pa;
+ arg2 = tb->cpu_mondo_block_pa;
+
+ __asm__ __volatile__("ta %8"
+ : "=&r" (func), "=&r" (arg0),
+ "=&r" (arg1), "=&r" (arg2)
+ : "0" (func), "1" (arg0),
+ "2" (arg1), "3" (arg2),
+ "i" (HV_FAST_TRAP)
+ : "memory");
+ if (likely(arg0 == HV_EOK))
+ break;
+
+ if (unlikely(++retries > 100)) {
+ printk("CPU[%d]: sun4v mondo error %lu\n",
+ this_cpu, func);
+ break;
+ }
+
+ cnt = update_cpu_list(cpu_list, cnt, mask);
+
+ udelay(2 * cnt);
+ } while (1);
+
+ put_cpu();
+}
+#else
+/* Single-cpu list version. */
+static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
+{
+ int this_cpu = get_cpu();
+ struct trap_per_cpu *tb = &trap_block[this_cpu];
+ u64 *mondo = __va(tb->cpu_mondo_block_pa);
+ u16 *cpu_list = __va(tb->cpu_list_pa);
+ int i;
+
+ mondo[0] = data0;
+ mondo[1] = data1;
+ mondo[2] = data2;
+ wmb();
+
+ for_each_cpu_mask(i, mask) {
+ int retries = 0;
+
+ do {
+ register unsigned long func __asm__("%o5");
+ register unsigned long arg0 __asm__("%o0");
+ register unsigned long arg1 __asm__("%o1");
+ register unsigned long arg2 __asm__("%o2");
+
+ cpu_list[0] = i;
+ func = HV_FAST_CPU_MONDO_SEND;
+ arg0 = 1;
+ arg1 = tb->cpu_list_pa;
+ arg2 = tb->cpu_mondo_block_pa;
+
+ __asm__ __volatile__("ta %8"
+ : "=&r" (func), "=&r" (arg0),
+ "=&r" (arg1), "=&r" (arg2)
+ : "0" (func), "1" (arg0),
+ "2" (arg1), "3" (arg2),
+ "i" (HV_FAST_TRAP)
+ : "memory");
+ if (likely(arg0 == HV_EOK))
+ break;
+
+ if (unlikely(++retries > 100)) {
+ printk("CPU[%d]: sun4v mondo error %lu\n",
+ this_cpu, func);
+ break;
+ }
+
+ udelay(2 * i);
+ } while (1);
+ }
+
+ put_cpu();
+}
+#endif
+
/* Send cross call to all processors mentioned in MASK
* except self.
*/
if (tlb_type == spitfire)
spitfire_xcall_deliver(data0, data1, data2, mask);
- else
+ else if (tlb_type == cheetah || tlb_type == cheetah_plus)
cheetah_xcall_deliver(data0, data1, data2, mask);
+ else
+ hypervisor_xcall_deliver(data0, data1, data2, mask);
/* NOTE: Caller runs local copy on master. */
put_cpu();
* You must not call this function with disabled interrupts or from a
* hardware interrupt handler or from a bottom half handler.
*/
-int smp_call_function(void (*func)(void *info), void *info,
- int nonatomic, int wait)
+static int smp_call_function_mask(void (*func)(void *info), void *info,
+ int nonatomic, int wait, cpumask_t mask)
{
struct call_data_struct data;
- int cpus = num_online_cpus() - 1;
+ int cpus = cpus_weight(mask) - 1;
long timeout;
if (!cpus)
call_data = &data;
- smp_cross_call(&xcall_call_function, 0, 0, 0);
+ smp_cross_call_masked(&xcall_call_function, 0, 0, 0, mask);
/*
* Wait for other cpus to complete function or at
return 0;
}
+int smp_call_function(void (*func)(void *info), void *info,
+ int nonatomic, int wait)
+{
+ return smp_call_function_mask(func, info, nonatomic, wait,
+ cpu_online_map);
+}
+
void smp_call_function_client(int irq, struct pt_regs *regs)
{
void (*func) (void *info) = call_data->func;
}
}
+static void tsb_sync(void *info)
+{
+ struct mm_struct *mm = info;
+
+ if (current->active_mm == mm)
+ tsb_context_switch(mm);
+}
+
+void smp_tsb_sync(struct mm_struct *mm)
+{
+ smp_call_function_mask(tsb_sync, mm, 0, 1, mm->cpu_vm_mask);
+}
+
extern unsigned long xcall_flush_tlb_mm;
extern unsigned long xcall_flush_tlb_pending;
extern unsigned long xcall_flush_tlb_kernel_range;
-extern unsigned long xcall_flush_tlb_all_spitfire;
-extern unsigned long xcall_flush_tlb_all_cheetah;
extern unsigned long xcall_report_regs;
extern unsigned long xcall_receive_signal;
void smp_flush_dcache_page_impl(struct page *page, int cpu)
{
cpumask_t mask = cpumask_of_cpu(cpu);
- int this_cpu = get_cpu();
+ int this_cpu;
+
+ if (tlb_type == hypervisor)
+ return;
#ifdef CONFIG_DEBUG_DCFLUSH
atomic_inc(&dcpage_flushes);
#endif
+
+ this_cpu = get_cpu();
+
if (cpu == this_cpu) {
__local_flush_dcache_page(page);
} else if (cpu_online(cpu)) {
__pa(pg_addr),
(u64) pg_addr,
mask);
- } else {
+ } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
#ifdef DCACHE_ALIASING_POSSIBLE
data0 =
((u64)&xcall_flush_dcache_page_cheetah);
void *pg_addr = page_address(page);
cpumask_t mask = cpu_online_map;
u64 data0;
- int this_cpu = get_cpu();
+ int this_cpu;
+
+ if (tlb_type == hypervisor)
+ return;
+
+ this_cpu = get_cpu();
cpu_clear(this_cpu, mask);
__pa(pg_addr),
(u64) pg_addr,
mask);
- } else {
+ } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
#ifdef DCACHE_ALIASING_POSSIBLE
data0 = ((u64)&xcall_flush_dcache_page_cheetah);
cheetah_xcall_deliver(data0,
if (tlb_type == spitfire)
spitfire_xcall_deliver(data0, 0, 0, mask);
- else
+ else if (tlb_type == cheetah || tlb_type == cheetah_plus)
cheetah_xcall_deliver(data0, 0, 0, mask);
+ else if (tlb_type == hypervisor)
+ hypervisor_xcall_deliver(data0, 0, 0, mask);
}
}
smp_cross_call(&xcall_report_regs, 0, 0, 0);
}
-void smp_flush_tlb_all(void)
-{
- if (tlb_type == spitfire)
- smp_cross_call(&xcall_flush_tlb_all_spitfire, 0, 0, 0);
- else
- smp_cross_call(&xcall_flush_tlb_all_cheetah, 0, 0, 0);
- __flush_tlb_all();
-}
-
/* We know that the window frames of the user have been flushed
* to the stack before we get here because all callers of us
* are flush_tlb_*() routines, and these run after flush_cache_*()
* can service tlb flush xcalls...
*/
extern void prom_world(int);
-extern void save_alternate_globals(unsigned long *);
-extern void restore_alternate_globals(unsigned long *);
+
void smp_penguin_jailcell(int irq, struct pt_regs *regs)
{
- unsigned long global_save[24];
-
clear_softint(1 << irq);
preempt_disable();
__asm__ __volatile__("flushw");
- save_alternate_globals(global_save);
prom_world(1);
atomic_inc(&smp_capture_registry);
membar_storeload_storestore();
while (penguins_are_doing_time)
rmb();
- restore_alternate_globals(global_save);
atomic_dec(&smp_capture_registry);
prom_world(0);
return 0;
}
+/* Constrain the number of cpus to max_cpus. */
void __init smp_prepare_cpus(unsigned int max_cpus)
{
- int instance, mid;
-
- instance = 0;
- while (!cpu_find_by_instance(instance, NULL, &mid)) {
- if (mid < max_cpus)
- cpu_set(mid, phys_cpu_present_map);
- instance++;
- }
-
if (num_possible_cpus() > max_cpus) {
+ int instance, mid;
+
instance = 0;
while (!cpu_find_by_instance(instance, NULL, &mid)) {
if (mid != boot_cpu_id) {
smp_store_cpu_info(boot_cpu_id);
}
+/* Set this up early so that things like the scheduler can init
+ * properly. We use the same cpu mask for both the present and
+ * possible cpu map.
+ */
+void __init smp_setup_cpu_possible_map(void)
+{
+ int instance, mid;
+
+ instance = 0;
+ while (!cpu_find_by_instance(instance, NULL, &mid)) {
+ if (mid < NR_CPUS)
+ cpu_set(mid, phys_cpu_present_map);
+ instance++;
+ }
+}
+
void __devinit smp_prepare_boot_cpu(void)
{
- if (hard_smp_processor_id() >= NR_CPUS) {
+ int cpu = hard_smp_processor_id();
+
+ if (cpu >= NR_CPUS) {
prom_printf("Serious problem, boot cpu id >= NR_CPUS\n");
prom_halt();
}
- current_thread_info()->cpu = hard_smp_processor_id();
+ current_thread_info()->cpu = cpu;
+ __local_per_cpu_offset = __per_cpu_offset(cpu);
cpu_set(smp_processor_id(), cpu_online_map);
cpu_set(smp_processor_id(), phys_cpu_present_map);
if (!cpu_isset(cpu, cpu_online_map)) {
ret = -ENODEV;
} else {
- smp_synchronize_one_tick(cpu);
+ /* On SUN4V, writes to %tick and %stick are
+ * not allowed.
+ */
+ if (tlb_type != hypervisor)
+ smp_synchronize_one_tick(cpu);
}
}
return ret;
{
unsigned long goal, size, i;
char *ptr;
- /* Created by linker magic */
- extern char __per_cpu_start[], __per_cpu_end[];
/* Copy section for each CPU (we discard the original) */
- goal = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE);
-
+ goal = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
#ifdef CONFIG_MODULES
if (goal < PERCPU_ENOUGH_ROOM)
goal = PERCPU_ENOUGH_ROOM;
for (size = 1UL; size < goal; size <<= 1UL)
__per_cpu_shift++;
- /* Make sure the resulting __per_cpu_base value
- * will fit in the 43-bit sign extended IMMU
- * TSB register.
- */
- ptr = __alloc_bootmem(size * NR_CPUS, PAGE_SIZE,
- (unsigned long) __per_cpu_start);
+ ptr = alloc_bootmem(size * NR_CPUS);
__per_cpu_base = ptr - __per_cpu_start;
- if ((__per_cpu_shift < PAGE_SHIFT) ||
- (__per_cpu_base & ~PAGE_MASK) ||
- (__per_cpu_base != (((long) __per_cpu_base << 20) >> 20))) {
- prom_printf("PER_CPU: Invalid layout, "
- "ptr[%p] shift[%lx] base[%lx]\n",
- ptr, __per_cpu_shift, __per_cpu_base);
- prom_halt();
- }
-
for (i = 0; i < NR_CPUS; i++, ptr += size)
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
-
- /* Finally, load in the boot cpu's base value.
- * We abuse the IMMU TSB register for trap handler
- * entry and exit loading of %g5. That is why it
- * has to be page aligned.
- */
- cpu_setup_percpu_base(hard_smp_processor_id());
}