drivers/hv/hv.c

   1 /*
   2  * Copyright (c) 2009, Microsoft Corporation.
   3  *
   4  * This program is free software; you can redistribute it and/or modify it
   5  * under the terms and conditions of the GNU General Public License,
   6  * version 2, as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope it will be useful, but WITHOUT
   9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11  * more details.
  12  *
  13  * You should have received a copy of the GNU General Public License along with
  14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  15  * Place - Suite 330, Boston, MA 02111-1307 USA.
  16  *
  17  * Authors:
  18  *   Haiyang Zhang <haiyangz@microsoft.com>
  19  *   Hank Janssen  <hjanssen@microsoft.com>
  20  *
  21  */
  22 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  23
  24 #include <linux/kernel.h>
  25 #include <linux/mm.h>
  26 #include <linux/slab.h>
  27 #include <linux/vmalloc.h>
  28 #include <linux/hyperv.h>
  29 #include <linux/version.h>
  30 #include <linux/interrupt.h>
  31 #include <linux/clockchips.h>
  32 #include <asm/hyperv.h>
  33 #include <asm/mshyperv.h>
  34 #include "hyperv_vmbus.h"
  35
  36 /* The one and only */
  37 struct hv_context hv_context = {
  38         .synic_initialized      = false,
  39 };
  40
  41 #define HV_TIMER_FREQUENCY (10 * 1000 * 1000) /* 100ns period */
  42 #define HV_MAX_MAX_DELTA_TICKS 0xffffffff
  43 #define HV_MIN_DELTA_TICKS 1
  44
  45 /*
  46  * query_hypervisor_info - Get version info of the windows hypervisor
  47  */
  48 unsigned int host_info_eax;
  49 unsigned int host_info_ebx;
  50 unsigned int host_info_ecx;
  51 unsigned int host_info_edx;
  52
  53 static int query_hypervisor_info(void)
  54 {
  55         unsigned int eax;
  56         unsigned int ebx;
  57         unsigned int ecx;
  58         unsigned int edx;
  59         unsigned int max_leaf;
  60         unsigned int op;
  61
  62         /*
  63         * Its assumed that this is called after confirming that Viridian
  64         * is present. Query id and revision.
  65         */
  66         eax = 0;
  67         ebx = 0;
  68         ecx = 0;
  69         edx = 0;
  70         op = HVCPUID_VENDOR_MAXFUNCTION;
  71         cpuid(op, &eax, &ebx, &ecx, &edx);
  72
  73         max_leaf = eax;
  74
  75         if (max_leaf >= HVCPUID_VERSION) {
  76                 eax = 0;
  77                 ebx = 0;
  78                 ecx = 0;
  79                 edx = 0;
  80                 op = HVCPUID_VERSION;
  81                 cpuid(op, &eax, &ebx, &ecx, &edx);
  82                 host_info_eax = eax;
  83                 host_info_ebx = ebx;
  84                 host_info_ecx = ecx;
  85                 host_info_edx = edx;
  86         }
  87         return max_leaf;
  88 }
  89
  90 /*
  91  * hv_init - Main initialization routine.
  92  *
  93  * This routine must be called before any other routines in here are called
  94  */
  95 int hv_init(void)
  96 {
  97         int max_leaf;
  98         union hv_x64_msr_hypercall_contents hypercall_msr;
  99
 100         memset(hv_context.synic_event_page, 0, sizeof(void *) * NR_CPUS);
 101         memset(hv_context.synic_message_page, 0,
 102                sizeof(void *) * NR_CPUS);
 103         memset(hv_context.post_msg_page, 0,
 104                sizeof(void *) * NR_CPUS);
 105         memset(hv_context.vp_index, 0,
 106                sizeof(int) * NR_CPUS);
 107         memset(hv_context.event_dpc, 0,
 108                sizeof(void *) * NR_CPUS);
 109         memset(hv_context.msg_dpc, 0,
 110                sizeof(void *) * NR_CPUS);
 111         memset(hv_context.clk_evt, 0,
 112                sizeof(void *) * NR_CPUS);
 113
 114         max_leaf = query_hypervisor_info();
 115
 116
 117         /* See if the hypercall page is already set */
 118         hypercall_msr.as_uint64 = 0;
 119         rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
 120
 121         if (!hypercall_msr.enable)
 122                 return -ENOTSUPP;
 123
 124         return 0;
 125 }
 126
 127 /*
 128  * hv_cleanup - Cleanup routine.
 129  *
 130  * This routine is called normally during driver unloading or exiting.
 131  */
 132 void hv_cleanup(bool crash)
 133 {
 134
 135 }
 136
 137 /*
 138  * hv_post_message - Post a message using the hypervisor message IPC.
 139  *
 140  * This involves a hypercall.
 141  */
 142 int hv_post_message(union hv_connection_id connection_id,
 143                   enum hv_message_type message_type,
 144                   void *payload, size_t payload_size)
 145 {
 146
 147         struct hv_input_post_message *aligned_msg;
 148         u64 status;
 149
 150         if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
 151                 return -EMSGSIZE;
 152
 153         aligned_msg = (struct hv_input_post_message *)
 154                         hv_context.post_msg_page[get_cpu()];
 155
 156         aligned_msg->connectionid = connection_id;
 157         aligned_msg->reserved = 0;
 158         aligned_msg->message_type = message_type;
 159         aligned_msg->payload_size = payload_size;
 160         memcpy((void *)aligned_msg->payload, payload, payload_size);
 161
 162         status = hv_do_hypercall(HVCALL_POST_MESSAGE, aligned_msg, NULL);
 163
 164         put_cpu();
 165         return status & 0xFFFF;
 166 }
 167
 168 static int hv_ce_set_next_event(unsigned long delta,
 169                                 struct clock_event_device *evt)
 170 {
 171         u64 current_tick;
 172
 173         WARN_ON(!clockevent_state_oneshot(evt));
 174
 175         rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
 176         current_tick += delta;
 177         wrmsrl(HV_X64_MSR_STIMER0_COUNT, current_tick);
 178         return 0;
 179 }
 180
 181 static int hv_ce_shutdown(struct clock_event_device *evt)
 182 {
 183         wrmsrl(HV_X64_MSR_STIMER0_COUNT, 0);
 184         wrmsrl(HV_X64_MSR_STIMER0_CONFIG, 0);
 185
 186         return 0;
 187 }
 188
 189 static int hv_ce_set_oneshot(struct clock_event_device *evt)
 190 {
 191         union hv_timer_config timer_cfg;
 192
 193         timer_cfg.enable = 1;
 194         timer_cfg.auto_enable = 1;
 195         timer_cfg.sintx = VMBUS_MESSAGE_SINT;
 196         wrmsrl(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64);
 197
 198         return 0;
 199 }
 200
 201 static void hv_init_clockevent_device(struct clock_event_device *dev, int cpu)
 202 {
 203         dev->name = "Hyper-V clockevent";
 204         dev->features = CLOCK_EVT_FEAT_ONESHOT;
 205         dev->cpumask = cpumask_of(cpu);
 206         dev->rating = 1000;
 207         /*
 208          * Avoid settint dev->owner = THIS_MODULE deliberately as doing so will
 209          * result in clockevents_config_and_register() taking additional
 210          * references to the hv_vmbus module making it impossible to unload.
 211          */
 212
 213         dev->set_state_shutdown = hv_ce_shutdown;
 214         dev->set_state_oneshot = hv_ce_set_oneshot;
 215         dev->set_next_event = hv_ce_set_next_event;
 216 }
 217
 218
 219 int hv_synic_alloc(void)
 220 {
 221         size_t size = sizeof(struct tasklet_struct);
 222         size_t ced_size = sizeof(struct clock_event_device);
 223         int cpu;
 224
 225         hv_context.hv_numa_map = kzalloc(sizeof(struct cpumask) * nr_node_ids,
 226                                          GFP_ATOMIC);
 227         if (hv_context.hv_numa_map == NULL) {
 228                 pr_err("Unable to allocate NUMA map\n");
 229                 goto err;
 230         }
 231
 232         for_each_present_cpu(cpu) {
 233                 hv_context.event_dpc[cpu] = kmalloc(size, GFP_ATOMIC);
 234                 if (hv_context.event_dpc[cpu] == NULL) {
 235                         pr_err("Unable to allocate event dpc\n");
 236                         goto err;
 237                 }
 238                 tasklet_init(hv_context.event_dpc[cpu], vmbus_on_event, cpu);
 239
 240                 hv_context.msg_dpc[cpu] = kmalloc(size, GFP_ATOMIC);
 241                 if (hv_context.msg_dpc[cpu] == NULL) {
 242                         pr_err("Unable to allocate event dpc\n");
 243                         goto err;
 244                 }
 245                 tasklet_init(hv_context.msg_dpc[cpu], vmbus_on_msg_dpc, cpu);
 246
 247                 hv_context.clk_evt[cpu] = kzalloc(ced_size, GFP_ATOMIC);
 248                 if (hv_context.clk_evt[cpu] == NULL) {
 249                         pr_err("Unable to allocate clock event device\n");
 250                         goto err;
 251                 }
 252
 253                 hv_init_clockevent_device(hv_context.clk_evt[cpu], cpu);
 254
 255                 hv_context.synic_message_page[cpu] =
 256                         (void *)get_zeroed_page(GFP_ATOMIC);
 257
 258                 if (hv_context.synic_message_page[cpu] == NULL) {
 259                         pr_err("Unable to allocate SYNIC message page\n");
 260                         goto err;
 261                 }
 262
 263                 hv_context.synic_event_page[cpu] =
 264                         (void *)get_zeroed_page(GFP_ATOMIC);
 265
 266                 if (hv_context.synic_event_page[cpu] == NULL) {
 267                         pr_err("Unable to allocate SYNIC event page\n");
 268                         goto err;
 269                 }
 270
 271                 hv_context.post_msg_page[cpu] =
 272                         (void *)get_zeroed_page(GFP_ATOMIC);
 273
 274                 if (hv_context.post_msg_page[cpu] == NULL) {
 275                         pr_err("Unable to allocate post msg page\n");
 276                         goto err;
 277                 }
 278
 279                 INIT_LIST_HEAD(&hv_context.percpu_list[cpu]);
 280         }
 281
 282         return 0;
 283 err:
 284         return -ENOMEM;
 285 }
 286
 287 static void hv_synic_free_cpu(int cpu)
 288 {
 289         kfree(hv_context.event_dpc[cpu]);
 290         kfree(hv_context.msg_dpc[cpu]);
 291         kfree(hv_context.clk_evt[cpu]);
 292         if (hv_context.synic_event_page[cpu])
 293                 free_page((unsigned long)hv_context.synic_event_page[cpu]);
 294         if (hv_context.synic_message_page[cpu])
 295                 free_page((unsigned long)hv_context.synic_message_page[cpu]);
 296         if (hv_context.post_msg_page[cpu])
 297                 free_page((unsigned long)hv_context.post_msg_page[cpu]);
 298 }
 299
 300 void hv_synic_free(void)
 301 {
 302         int cpu;
 303
 304         kfree(hv_context.hv_numa_map);
 305         for_each_present_cpu(cpu)
 306                 hv_synic_free_cpu(cpu);
 307 }
 308
 309 /*
 310  * hv_synic_init - Initialize the Synthethic Interrupt Controller.
 311  *
 312  * If it is already initialized by another entity (ie x2v shim), we need to
 313  * retrieve the initialized message and event pages.  Otherwise, we create and
 314  * initialize the message and event pages.
 315  */
 316 int hv_synic_init(unsigned int cpu)
 317 {
 318         u64 version;
 319         union hv_synic_simp simp;
 320         union hv_synic_siefp siefp;
 321         union hv_synic_sint shared_sint;
 322         union hv_synic_scontrol sctrl;
 323         u64 vp_index;
 324
 325         /* Check the version */
 326         rdmsrl(HV_X64_MSR_SVERSION, version);
 327
 328         /* Setup the Synic's message page */
 329         rdmsrl(HV_X64_MSR_SIMP, simp.as_uint64);
 330         simp.simp_enabled = 1;
 331         simp.base_simp_gpa = virt_to_phys(hv_context.synic_message_page[cpu])
 332                 >> PAGE_SHIFT;
 333
 334         wrmsrl(HV_X64_MSR_SIMP, simp.as_uint64);
 335
 336         /* Setup the Synic's event page */
 337         rdmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64);
 338         siefp.siefp_enabled = 1;
 339         siefp.base_siefp_gpa = virt_to_phys(hv_context.synic_event_page[cpu])
 340                 >> PAGE_SHIFT;
 341
 342         wrmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64);
 343
 344         /* Setup the shared SINT. */
 345         rdmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
 346
 347         shared_sint.as_uint64 = 0;
 348         shared_sint.vector = HYPERVISOR_CALLBACK_VECTOR;
 349         shared_sint.masked = false;
 350         shared_sint.auto_eoi = true;
 351
 352         wrmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
 353
 354         /* Enable the global synic bit */
 355         rdmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64);
 356         sctrl.enable = 1;
 357
 358         wrmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64);
 359
 360         hv_context.synic_initialized = true;
 361
 362         /*
 363          * Setup the mapping between Hyper-V's notion
 364          * of cpuid and Linux' notion of cpuid.
 365          * This array will be indexed using Linux cpuid.
 366          */
 367         rdmsrl(HV_X64_MSR_VP_INDEX, vp_index);
 368         hv_context.vp_index[cpu] = (u32)vp_index;
 369
 370         /*
 371          * Register the per-cpu clockevent source.
 372          */
 373         if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE)
 374                 clockevents_config_and_register(hv_context.clk_evt[cpu],
 375                                                 HV_TIMER_FREQUENCY,
 376                                                 HV_MIN_DELTA_TICKS,
 377                                                 HV_MAX_MAX_DELTA_TICKS);
 378         return 0;
 379 }
 380
 381 /*
 382  * hv_synic_clockevents_cleanup - Cleanup clockevent devices
 383  */
 384 void hv_synic_clockevents_cleanup(void)
 385 {
 386         int cpu;
 387
 388         if (!(ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE))
 389                 return;
 390
 391         for_each_present_cpu(cpu)
 392                 clockevents_unbind_device(hv_context.clk_evt[cpu], cpu);
 393 }
 394
 395 /*
 396  * hv_synic_cleanup - Cleanup routine for hv_synic_init().
 397  */
 398 int hv_synic_cleanup(unsigned int cpu)
 399 {
 400         union hv_synic_sint shared_sint;
 401         union hv_synic_simp simp;
 402         union hv_synic_siefp siefp;
 403         union hv_synic_scontrol sctrl;
 404         struct vmbus_channel *channel, *sc;
 405         bool channel_found = false;
 406         unsigned long flags;
 407
 408         if (!hv_context.synic_initialized)
 409                 return -EFAULT;
 410
 411         /*
 412          * Search for channels which are bound to the CPU we're about to
 413          * cleanup. In case we find one and vmbus is still connected we need to
 414          * fail, this will effectively prevent CPU offlining. There is no way
 415          * we can re-bind channels to different CPUs for now.
 416          */
 417         mutex_lock(&vmbus_connection.channel_mutex);
 418         list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
 419                 if (channel->target_cpu == cpu) {
 420                         channel_found = true;
 421                         break;
 422                 }
 423                 spin_lock_irqsave(&channel->lock, flags);
 424                 list_for_each_entry(sc, &channel->sc_list, sc_list) {
 425                         if (sc->target_cpu == cpu) {
 426                                 channel_found = true;
 427                                 break;
 428                         }
 429                 }
 430                 spin_unlock_irqrestore(&channel->lock, flags);
 431                 if (channel_found)
 432                         break;
 433         }
 434         mutex_unlock(&vmbus_connection.channel_mutex);
 435
 436         if (channel_found && vmbus_connection.conn_state == CONNECTED)
 437                 return -EBUSY;
 438
 439         /* Turn off clockevent device */
 440         if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE) {
 441                 clockevents_unbind_device(hv_context.clk_evt[cpu], cpu);
 442                 hv_ce_shutdown(hv_context.clk_evt[cpu]);
 443         }
 444
 445         rdmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
 446
 447         shared_sint.masked = 1;
 448
 449         /* Need to correctly cleanup in the case of SMP!!! */
 450         /* Disable the interrupt */
 451         wrmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
 452
 453         rdmsrl(HV_X64_MSR_SIMP, simp.as_uint64);
 454         simp.simp_enabled = 0;
 455         simp.base_simp_gpa = 0;
 456
 457         wrmsrl(HV_X64_MSR_SIMP, simp.as_uint64);
 458
 459         rdmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64);
 460         siefp.siefp_enabled = 0;
 461         siefp.base_siefp_gpa = 0;
 462
 463         wrmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64);
 464
 465         /* Disable the global synic bit */
 466         rdmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64);
 467         sctrl.enable = 0;
 468         wrmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64);
 469
 470         return 0;
 471 }