drivers/hv/hv.c

   1 /*
   2  * Copyright (c) 2009, Microsoft Corporation.
   3  *
   4  * This program is free software; you can redistribute it and/or modify it
   5  * under the terms and conditions of the GNU General Public License,
   6  * version 2, as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope it will be useful, but WITHOUT
   9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11  * more details.
  12  *
  13  * You should have received a copy of the GNU General Public License along with
  14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  15  * Place - Suite 330, Boston, MA 02111-1307 USA.
  16  *
  17  * Authors:
  18  *   Haiyang Zhang <haiyangz@microsoft.com>
  19  *   Hank Janssen  <hjanssen@microsoft.com>
  20  *
  21  */
  22 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  23
  24 #include <linux/kernel.h>
  25 #include <linux/mm.h>
  26 #include <linux/slab.h>
  27 #include <linux/vmalloc.h>
  28 #include <linux/hyperv.h>
  29 #include <linux/version.h>
  30 #include <linux/interrupt.h>
  31 #include <linux/clockchips.h>
  32 #include <asm/hyperv.h>
  33 #include <asm/mshyperv.h>
  34 #include "hyperv_vmbus.h"
  35
  36 /* The one and only */
  37 struct hv_context hv_context = {
  38         .synic_initialized      = false,
  39         .hypercall_page         = NULL,
  40 };
  41
  42 #define HV_TIMER_FREQUENCY (10 * 1000 * 1000) /* 100ns period */
  43 #define HV_MAX_MAX_DELTA_TICKS 0xffffffff
  44 #define HV_MIN_DELTA_TICKS 1
  45
  46 /*
  47  * query_hypervisor_info - Get version info of the windows hypervisor
  48  */
  49 unsigned int host_info_eax;
  50 unsigned int host_info_ebx;
  51 unsigned int host_info_ecx;
  52 unsigned int host_info_edx;
  53
  54 static int query_hypervisor_info(void)
  55 {
  56         unsigned int eax;
  57         unsigned int ebx;
  58         unsigned int ecx;
  59         unsigned int edx;
  60         unsigned int max_leaf;
  61         unsigned int op;
  62
  63         /*
  64         * Its assumed that this is called after confirming that Viridian
  65         * is present. Query id and revision.
  66         */
  67         eax = 0;
  68         ebx = 0;
  69         ecx = 0;
  70         edx = 0;
  71         op = HVCPUID_VENDOR_MAXFUNCTION;
  72         cpuid(op, &eax, &ebx, &ecx, &edx);
  73
  74         max_leaf = eax;
  75
  76         if (max_leaf >= HVCPUID_VERSION) {
  77                 eax = 0;
  78                 ebx = 0;
  79                 ecx = 0;
  80                 edx = 0;
  81                 op = HVCPUID_VERSION;
  82                 cpuid(op, &eax, &ebx, &ecx, &edx);
  83                 host_info_eax = eax;
  84                 host_info_ebx = ebx;
  85                 host_info_ecx = ecx;
  86                 host_info_edx = edx;
  87         }
  88         return max_leaf;
  89 }
  90
  91 /*
  92  * hv_do_hypercall- Invoke the specified hypercall
  93  */
  94 u64 hv_do_hypercall(u64 control, void *input, void *output)
  95 {
  96         u64 input_address = (input) ? virt_to_phys(input) : 0;
  97         u64 output_address = (output) ? virt_to_phys(output) : 0;
  98         void *hypercall_page = hv_context.hypercall_page;
  99 #ifdef CONFIG_X86_64
 100         u64 hv_status = 0;
 101
 102         if (!hypercall_page)
 103                 return (u64)ULLONG_MAX;
 104
 105         __asm__ __volatile__("mov %0, %%r8" : : "r" (output_address) : "r8");
 106         __asm__ __volatile__("call *%3" : "=a" (hv_status) :
 107                              "c" (control), "d" (input_address),
 108                              "m" (hypercall_page));
 109
 110         return hv_status;
 111
 112 #else
 113
 114         u32 control_hi = control >> 32;
 115         u32 control_lo = control & 0xFFFFFFFF;
 116         u32 hv_status_hi = 1;
 117         u32 hv_status_lo = 1;
 118         u32 input_address_hi = input_address >> 32;
 119         u32 input_address_lo = input_address & 0xFFFFFFFF;
 120         u32 output_address_hi = output_address >> 32;
 121         u32 output_address_lo = output_address & 0xFFFFFFFF;
 122
 123         if (!hypercall_page)
 124                 return (u64)ULLONG_MAX;
 125
 126         __asm__ __volatile__ ("call *%8" : "=d"(hv_status_hi),
 127                               "=a"(hv_status_lo) : "d" (control_hi),
 128                               "a" (control_lo), "b" (input_address_hi),
 129                               "c" (input_address_lo), "D"(output_address_hi),
 130                               "S"(output_address_lo), "m" (hypercall_page));
 131
 132         return hv_status_lo | ((u64)hv_status_hi << 32);
 133 #endif /* !x86_64 */
 134 }
 135 EXPORT_SYMBOL_GPL(hv_do_hypercall);
 136
 137 #ifdef CONFIG_X86_64
 138 static u64 read_hv_clock_tsc(struct clocksource *arg)
 139 {
 140         u64 current_tick;
 141         struct ms_hyperv_tsc_page *tsc_pg = hv_context.tsc_page;
 142
 143         if (tsc_pg->tsc_sequence != 0) {
 144                 /*
 145                  * Use the tsc page to compute the value.
 146                  */
 147
 148                 while (1) {
 149                         u64 tmp;
 150                         u32 sequence = tsc_pg->tsc_sequence;
 151                         u64 cur_tsc;
 152                         u64 scale = tsc_pg->tsc_scale;
 153                         s64 offset = tsc_pg->tsc_offset;
 154
 155                         rdtscll(cur_tsc);
 156                         /* current_tick = ((cur_tsc *scale) >> 64) + offset */
 157                         asm("mulq %3"
 158                                 : "=d" (current_tick), "=a" (tmp)
 159                                 : "a" (cur_tsc), "r" (scale));
 160
 161                         current_tick += offset;
 162                         if (tsc_pg->tsc_sequence == sequence)
 163                                 return current_tick;
 164
 165                         if (tsc_pg->tsc_sequence != 0)
 166                                 continue;
 167                         /*
 168                          * Fallback using MSR method.
 169                          */
 170                         break;
 171                 }
 172         }
 173         rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
 174         return current_tick;
 175 }
 176
 177 static struct clocksource hyperv_cs_tsc = {
 178                 .name           = "hyperv_clocksource_tsc_page",
 179                 .rating         = 425,
 180                 .read           = read_hv_clock_tsc,
 181                 .mask           = CLOCKSOURCE_MASK(64),
 182                 .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 183 };
 184 #endif
 185
 186
 187 /*
 188  * hv_init - Main initialization routine.
 189  *
 190  * This routine must be called before any other routines in here are called
 191  */
 192 int hv_init(void)
 193 {
 194         int max_leaf;
 195         union hv_x64_msr_hypercall_contents hypercall_msr;
 196
 197         memset(hv_context.synic_event_page, 0, sizeof(void *) * NR_CPUS);
 198         memset(hv_context.synic_message_page, 0,
 199                sizeof(void *) * NR_CPUS);
 200         memset(hv_context.post_msg_page, 0,
 201                sizeof(void *) * NR_CPUS);
 202         memset(hv_context.vp_index, 0,
 203                sizeof(int) * NR_CPUS);
 204         memset(hv_context.event_dpc, 0,
 205                sizeof(void *) * NR_CPUS);
 206         memset(hv_context.msg_dpc, 0,
 207                sizeof(void *) * NR_CPUS);
 208         memset(hv_context.clk_evt, 0,
 209                sizeof(void *) * NR_CPUS);
 210
 211         max_leaf = query_hypervisor_info();
 212
 213
 214         /* See if the hypercall page is already set */
 215         hypercall_msr.as_uint64 = 0;
 216         rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
 217
 218         if (!hypercall_msr.enable)
 219                 return -ENOTSUPP;
 220
 221         hv_context.hypercall_page = hv_hypercall_pg;
 222
 223 #ifdef CONFIG_X86_64
 224         if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) {
 225                 union hv_x64_msr_hypercall_contents tsc_msr;
 226                 void *va_tsc;
 227
 228                 va_tsc = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
 229                 if (!va_tsc)
 230                         goto cleanup;
 231                 hv_context.tsc_page = va_tsc;
 232
 233                 rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
 234
 235                 tsc_msr.enable = 1;
 236                 tsc_msr.guest_physical_address = vmalloc_to_pfn(va_tsc);
 237
 238                 wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
 239                 clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
 240         }
 241 #endif
 242         return 0;
 243
 244 cleanup:
 245         return -ENOTSUPP;
 246 }
 247
 248 /*
 249  * hv_cleanup - Cleanup routine.
 250  *
 251  * This routine is called normally during driver unloading or exiting.
 252  */
 253 void hv_cleanup(bool crash)
 254 {
 255
 256 #ifdef CONFIG_X86_64
 257         union hv_x64_msr_hypercall_contents hypercall_msr;
 258         /*
 259          * Cleanup the TSC page based CS.
 260          */
 261         if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) {
 262                 /*
 263                  * Crash can happen in an interrupt context and unregistering
 264                  * a clocksource is impossible and redundant in this case.
 265                  */
 266                 if (!oops_in_progress) {
 267                         clocksource_change_rating(&hyperv_cs_tsc, 10);
 268                         clocksource_unregister(&hyperv_cs_tsc);
 269                 }
 270
 271                 hypercall_msr.as_uint64 = 0;
 272                 wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64);
 273                 if (!crash) {
 274                         vfree(hv_context.tsc_page);
 275                         hv_context.tsc_page = NULL;
 276                 }
 277         }
 278 #endif
 279 }
 280
 281 /*
 282  * hv_post_message - Post a message using the hypervisor message IPC.
 283  *
 284  * This involves a hypercall.
 285  */
 286 int hv_post_message(union hv_connection_id connection_id,
 287                   enum hv_message_type message_type,
 288                   void *payload, size_t payload_size)
 289 {
 290
 291         struct hv_input_post_message *aligned_msg;
 292         u64 status;
 293
 294         if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
 295                 return -EMSGSIZE;
 296
 297         aligned_msg = (struct hv_input_post_message *)
 298                         hv_context.post_msg_page[get_cpu()];
 299
 300         aligned_msg->connectionid = connection_id;
 301         aligned_msg->reserved = 0;
 302         aligned_msg->message_type = message_type;
 303         aligned_msg->payload_size = payload_size;
 304         memcpy((void *)aligned_msg->payload, payload, payload_size);
 305
 306         status = hv_do_hypercall(HVCALL_POST_MESSAGE, aligned_msg, NULL);
 307
 308         put_cpu();
 309         return status & 0xFFFF;
 310 }
 311
 312 static int hv_ce_set_next_event(unsigned long delta,
 313                                 struct clock_event_device *evt)
 314 {
 315         u64 current_tick;
 316
 317         WARN_ON(!clockevent_state_oneshot(evt));
 318
 319         rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
 320         current_tick += delta;
 321         wrmsrl(HV_X64_MSR_STIMER0_COUNT, current_tick);
 322         return 0;
 323 }
 324
 325 static int hv_ce_shutdown(struct clock_event_device *evt)
 326 {
 327         wrmsrl(HV_X64_MSR_STIMER0_COUNT, 0);
 328         wrmsrl(HV_X64_MSR_STIMER0_CONFIG, 0);
 329
 330         return 0;
 331 }
 332
 333 static int hv_ce_set_oneshot(struct clock_event_device *evt)
 334 {
 335         union hv_timer_config timer_cfg;
 336
 337         timer_cfg.enable = 1;
 338         timer_cfg.auto_enable = 1;
 339         timer_cfg.sintx = VMBUS_MESSAGE_SINT;
 340         wrmsrl(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64);
 341
 342         return 0;
 343 }
 344
 345 static void hv_init_clockevent_device(struct clock_event_device *dev, int cpu)
 346 {
 347         dev->name = "Hyper-V clockevent";
 348         dev->features = CLOCK_EVT_FEAT_ONESHOT;
 349         dev->cpumask = cpumask_of(cpu);
 350         dev->rating = 1000;
 351         /*
 352          * Avoid settint dev->owner = THIS_MODULE deliberately as doing so will
 353          * result in clockevents_config_and_register() taking additional
 354          * references to the hv_vmbus module making it impossible to unload.
 355          */
 356
 357         dev->set_state_shutdown = hv_ce_shutdown;
 358         dev->set_state_oneshot = hv_ce_set_oneshot;
 359         dev->set_next_event = hv_ce_set_next_event;
 360 }
 361
 362
 363 int hv_synic_alloc(void)
 364 {
 365         size_t size = sizeof(struct tasklet_struct);
 366         size_t ced_size = sizeof(struct clock_event_device);
 367         int cpu;
 368
 369         hv_context.hv_numa_map = kzalloc(sizeof(struct cpumask) * nr_node_ids,
 370                                          GFP_ATOMIC);
 371         if (hv_context.hv_numa_map == NULL) {
 372                 pr_err("Unable to allocate NUMA map\n");
 373                 goto err;
 374         }
 375
 376         for_each_present_cpu(cpu) {
 377                 hv_context.event_dpc[cpu] = kmalloc(size, GFP_ATOMIC);
 378                 if (hv_context.event_dpc[cpu] == NULL) {
 379                         pr_err("Unable to allocate event dpc\n");
 380                         goto err;
 381                 }
 382                 tasklet_init(hv_context.event_dpc[cpu], vmbus_on_event, cpu);
 383
 384                 hv_context.msg_dpc[cpu] = kmalloc(size, GFP_ATOMIC);
 385                 if (hv_context.msg_dpc[cpu] == NULL) {
 386                         pr_err("Unable to allocate event dpc\n");
 387                         goto err;
 388                 }
 389                 tasklet_init(hv_context.msg_dpc[cpu], vmbus_on_msg_dpc, cpu);
 390
 391                 hv_context.clk_evt[cpu] = kzalloc(ced_size, GFP_ATOMIC);
 392                 if (hv_context.clk_evt[cpu] == NULL) {
 393                         pr_err("Unable to allocate clock event device\n");
 394                         goto err;
 395                 }
 396
 397                 hv_init_clockevent_device(hv_context.clk_evt[cpu], cpu);
 398
 399                 hv_context.synic_message_page[cpu] =
 400                         (void *)get_zeroed_page(GFP_ATOMIC);
 401
 402                 if (hv_context.synic_message_page[cpu] == NULL) {
 403                         pr_err("Unable to allocate SYNIC message page\n");
 404                         goto err;
 405                 }
 406
 407                 hv_context.synic_event_page[cpu] =
 408                         (void *)get_zeroed_page(GFP_ATOMIC);
 409
 410                 if (hv_context.synic_event_page[cpu] == NULL) {
 411                         pr_err("Unable to allocate SYNIC event page\n");
 412                         goto err;
 413                 }
 414
 415                 hv_context.post_msg_page[cpu] =
 416                         (void *)get_zeroed_page(GFP_ATOMIC);
 417
 418                 if (hv_context.post_msg_page[cpu] == NULL) {
 419                         pr_err("Unable to allocate post msg page\n");
 420                         goto err;
 421                 }
 422
 423                 INIT_LIST_HEAD(&hv_context.percpu_list[cpu]);
 424         }
 425
 426         return 0;
 427 err:
 428         return -ENOMEM;
 429 }
 430
 431 static void hv_synic_free_cpu(int cpu)
 432 {
 433         kfree(hv_context.event_dpc[cpu]);
 434         kfree(hv_context.msg_dpc[cpu]);
 435         kfree(hv_context.clk_evt[cpu]);
 436         if (hv_context.synic_event_page[cpu])
 437                 free_page((unsigned long)hv_context.synic_event_page[cpu]);
 438         if (hv_context.synic_message_page[cpu])
 439                 free_page((unsigned long)hv_context.synic_message_page[cpu]);
 440         if (hv_context.post_msg_page[cpu])
 441                 free_page((unsigned long)hv_context.post_msg_page[cpu]);
 442 }
 443
 444 void hv_synic_free(void)
 445 {
 446         int cpu;
 447
 448         kfree(hv_context.hv_numa_map);
 449         for_each_present_cpu(cpu)
 450                 hv_synic_free_cpu(cpu);
 451 }
 452
 453 /*
 454  * hv_synic_init - Initialize the Synthethic Interrupt Controller.
 455  *
 456  * If it is already initialized by another entity (ie x2v shim), we need to
 457  * retrieve the initialized message and event pages.  Otherwise, we create and
 458  * initialize the message and event pages.
 459  */
 460 int hv_synic_init(unsigned int cpu)
 461 {
 462         u64 version;
 463         union hv_synic_simp simp;
 464         union hv_synic_siefp siefp;
 465         union hv_synic_sint shared_sint;
 466         union hv_synic_scontrol sctrl;
 467         u64 vp_index;
 468
 469         if (!hv_context.hypercall_page)
 470                 return -EFAULT;
 471
 472         /* Check the version */
 473         rdmsrl(HV_X64_MSR_SVERSION, version);
 474
 475         /* Setup the Synic's message page */
 476         rdmsrl(HV_X64_MSR_SIMP, simp.as_uint64);
 477         simp.simp_enabled = 1;
 478         simp.base_simp_gpa = virt_to_phys(hv_context.synic_message_page[cpu])
 479                 >> PAGE_SHIFT;
 480
 481         wrmsrl(HV_X64_MSR_SIMP, simp.as_uint64);
 482
 483         /* Setup the Synic's event page */
 484         rdmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64);
 485         siefp.siefp_enabled = 1;
 486         siefp.base_siefp_gpa = virt_to_phys(hv_context.synic_event_page[cpu])
 487                 >> PAGE_SHIFT;
 488
 489         wrmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64);
 490
 491         /* Setup the shared SINT. */
 492         rdmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
 493
 494         shared_sint.as_uint64 = 0;
 495         shared_sint.vector = HYPERVISOR_CALLBACK_VECTOR;
 496         shared_sint.masked = false;
 497         shared_sint.auto_eoi = true;
 498
 499         wrmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
 500
 501         /* Enable the global synic bit */
 502         rdmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64);
 503         sctrl.enable = 1;
 504
 505         wrmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64);
 506
 507         hv_context.synic_initialized = true;
 508
 509         /*
 510          * Setup the mapping between Hyper-V's notion
 511          * of cpuid and Linux' notion of cpuid.
 512          * This array will be indexed using Linux cpuid.
 513          */
 514         rdmsrl(HV_X64_MSR_VP_INDEX, vp_index);
 515         hv_context.vp_index[cpu] = (u32)vp_index;
 516
 517         /*
 518          * Register the per-cpu clockevent source.
 519          */
 520         if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE)
 521                 clockevents_config_and_register(hv_context.clk_evt[cpu],
 522                                                 HV_TIMER_FREQUENCY,
 523                                                 HV_MIN_DELTA_TICKS,
 524                                                 HV_MAX_MAX_DELTA_TICKS);
 525         return 0;
 526 }
 527
 528 /*
 529  * hv_synic_clockevents_cleanup - Cleanup clockevent devices
 530  */
 531 void hv_synic_clockevents_cleanup(void)
 532 {
 533         int cpu;
 534
 535         if (!(ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE))
 536                 return;
 537
 538         for_each_present_cpu(cpu)
 539                 clockevents_unbind_device(hv_context.clk_evt[cpu], cpu);
 540 }
 541
 542 /*
 543  * hv_synic_cleanup - Cleanup routine for hv_synic_init().
 544  */
 545 int hv_synic_cleanup(unsigned int cpu)
 546 {
 547         union hv_synic_sint shared_sint;
 548         union hv_synic_simp simp;
 549         union hv_synic_siefp siefp;
 550         union hv_synic_scontrol sctrl;
 551         struct vmbus_channel *channel, *sc;
 552         bool channel_found = false;
 553         unsigned long flags;
 554
 555         if (!hv_context.synic_initialized)
 556                 return -EFAULT;
 557
 558         /*
 559          * Search for channels which are bound to the CPU we're about to
 560          * cleanup. In case we find one and vmbus is still connected we need to
 561          * fail, this will effectively prevent CPU offlining. There is no way
 562          * we can re-bind channels to different CPUs for now.
 563          */
 564         mutex_lock(&vmbus_connection.channel_mutex);
 565         list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
 566                 if (channel->target_cpu == cpu) {
 567                         channel_found = true;
 568                         break;
 569                 }
 570                 spin_lock_irqsave(&channel->lock, flags);
 571                 list_for_each_entry(sc, &channel->sc_list, sc_list) {
 572                         if (sc->target_cpu == cpu) {
 573                                 channel_found = true;
 574                                 break;
 575                         }
 576                 }
 577                 spin_unlock_irqrestore(&channel->lock, flags);
 578                 if (channel_found)
 579                         break;
 580         }
 581         mutex_unlock(&vmbus_connection.channel_mutex);
 582
 583         if (channel_found && vmbus_connection.conn_state == CONNECTED)
 584                 return -EBUSY;
 585
 586         /* Turn off clockevent device */
 587         if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE) {
 588                 clockevents_unbind_device(hv_context.clk_evt[cpu], cpu);
 589                 hv_ce_shutdown(hv_context.clk_evt[cpu]);
 590         }
 591
 592         rdmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
 593
 594         shared_sint.masked = 1;
 595
 596         /* Need to correctly cleanup in the case of SMP!!! */
 597         /* Disable the interrupt */
 598         wrmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
 599
 600         rdmsrl(HV_X64_MSR_SIMP, simp.as_uint64);
 601         simp.simp_enabled = 0;
 602         simp.base_simp_gpa = 0;
 603
 604         wrmsrl(HV_X64_MSR_SIMP, simp.as_uint64);
 605
 606         rdmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64);
 607         siefp.siefp_enabled = 0;
 608         siefp.base_siefp_gpa = 0;
 609
 610         wrmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64);
 611
 612         /* Disable the global synic bit */
 613         rdmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64);
 614         sctrl.enable = 0;
 615         wrmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64);
 616
 617         return 0;
 618 }