drivers/hv/hv.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Copyright (c) 2009, Microsoft Corporation.
   4  *
   5  * Authors:
   6  *   Haiyang Zhang <haiyangz@microsoft.com>
   7  *   Hank Janssen  <hjanssen@microsoft.com>
   8  */
   9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  10
  11 #include <linux/kernel.h>
  12 #include <linux/mm.h>
  13 #include <linux/slab.h>
  14 #include <linux/vmalloc.h>
  15 #include <linux/hyperv.h>
  16 #include <linux/random.h>
  17 #include <linux/clockchips.h>
  18 #include <linux/delay.h>
  19 #include <linux/interrupt.h>
  20 #include <clocksource/hyperv_timer.h>
  21 #include <asm/mshyperv.h>
  22 #include "hyperv_vmbus.h"
  23
  24 /* The one and only */
  25 struct hv_context hv_context;
  26
  27 /*
  28  * hv_init - Main initialization routine.
  29  *
  30  * This routine must be called before any other routines in here are called
  31  */
  32 int hv_init(void)
  33 {
  34         hv_context.cpu_context = alloc_percpu(struct hv_per_cpu_context);
  35         if (!hv_context.cpu_context)
  36                 return -ENOMEM;
  37         return 0;
  38 }
  39
  40 /*
  41  * Functions for allocating and freeing memory with size and
  42  * alignment HV_HYP_PAGE_SIZE. These functions are needed because
  43  * the guest page size may not be the same as the Hyper-V page
  44  * size. We depend upon kmalloc() aligning power-of-two size
  45  * allocations to the allocation size boundary, so that the
  46  * allocated memory appears to Hyper-V as a page of the size
  47  * it expects.
  48  */
  49
  50 void *hv_alloc_hyperv_page(void)
  51 {
  52         BUILD_BUG_ON(PAGE_SIZE <  HV_HYP_PAGE_SIZE);
  53
  54         if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
  55                 return (void *)__get_free_page(GFP_KERNEL);
  56         else
  57                 return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
  58 }
  59
  60 void *hv_alloc_hyperv_zeroed_page(void)
  61 {
  62         if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
  63                 return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
  64         else
  65                 return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
  66 }
  67
  68 void hv_free_hyperv_page(unsigned long addr)
  69 {
  70         if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
  71                 free_page(addr);
  72         else
  73                 kfree((void *)addr);
  74 }
  75
  76 /*
  77  * hv_post_message - Post a message using the hypervisor message IPC.
  78  *
  79  * This involves a hypercall.
  80  */
  81 int hv_post_message(union hv_connection_id connection_id,
  82                   enum hv_message_type message_type,
  83                   void *payload, size_t payload_size)
  84 {
  85         struct hv_input_post_message *aligned_msg;
  86         struct hv_per_cpu_context *hv_cpu;
  87         u64 status;
  88
  89         if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
  90                 return -EMSGSIZE;
  91
  92         hv_cpu = get_cpu_ptr(hv_context.cpu_context);
  93         aligned_msg = hv_cpu->post_msg_page;
  94         aligned_msg->connectionid = connection_id;
  95         aligned_msg->reserved = 0;
  96         aligned_msg->message_type = message_type;
  97         aligned_msg->payload_size = payload_size;
  98         memcpy((void *)aligned_msg->payload, payload, payload_size);
  99
 100         status = hv_do_hypercall(HVCALL_POST_MESSAGE, aligned_msg, NULL);
 101
 102         /* Preemption must remain disabled until after the hypercall
 103          * so some other thread can't get scheduled onto this cpu and
 104          * corrupt the per-cpu post_msg_page
 105          */
 106         put_cpu_ptr(hv_cpu);
 107
 108         return hv_result(status);
 109 }
 110
 111 int hv_synic_alloc(void)
 112 {
 113         int cpu;
 114         struct hv_per_cpu_context *hv_cpu;
 115
 116         /*
 117          * First, zero all per-cpu memory areas so hv_synic_free() can
 118          * detect what memory has been allocated and cleanup properly
 119          * after any failures.
 120          */
 121         for_each_present_cpu(cpu) {
 122                 hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
 123                 memset(hv_cpu, 0, sizeof(*hv_cpu));
 124         }
 125
 126         hv_context.hv_numa_map = kcalloc(nr_node_ids, sizeof(struct cpumask),
 127                                          GFP_KERNEL);
 128         if (hv_context.hv_numa_map == NULL) {
 129                 pr_err("Unable to allocate NUMA map\n");
 130                 goto err;
 131         }
 132
 133         for_each_present_cpu(cpu) {
 134                 hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
 135
 136                 tasklet_init(&hv_cpu->msg_dpc,
 137                              vmbus_on_msg_dpc, (unsigned long) hv_cpu);
 138
 139                 hv_cpu->synic_message_page =
 140                         (void *)get_zeroed_page(GFP_ATOMIC);
 141                 if (hv_cpu->synic_message_page == NULL) {
 142                         pr_err("Unable to allocate SYNIC message page\n");
 143                         goto err;
 144                 }
 145
 146                 hv_cpu->synic_event_page = (void *)get_zeroed_page(GFP_ATOMIC);
 147                 if (hv_cpu->synic_event_page == NULL) {
 148                         pr_err("Unable to allocate SYNIC event page\n");
 149                         goto err;
 150                 }
 151
 152                 hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC);
 153                 if (hv_cpu->post_msg_page == NULL) {
 154                         pr_err("Unable to allocate post msg page\n");
 155                         goto err;
 156                 }
 157         }
 158
 159         return 0;
 160 err:
 161         /*
 162          * Any memory allocations that succeeded will be freed when
 163          * the caller cleans up by calling hv_synic_free()
 164          */
 165         return -ENOMEM;
 166 }
 167
 168
 169 void hv_synic_free(void)
 170 {
 171         int cpu;
 172
 173         for_each_present_cpu(cpu) {
 174                 struct hv_per_cpu_context *hv_cpu
 175                         = per_cpu_ptr(hv_context.cpu_context, cpu);
 176
 177                 free_page((unsigned long)hv_cpu->synic_event_page);
 178                 free_page((unsigned long)hv_cpu->synic_message_page);
 179                 free_page((unsigned long)hv_cpu->post_msg_page);
 180         }
 181
 182         kfree(hv_context.hv_numa_map);
 183 }
 184
 185 /*
 186  * hv_synic_init - Initialize the Synthetic Interrupt Controller.
 187  *
 188  * If it is already initialized by another entity (ie x2v shim), we need to
 189  * retrieve the initialized message and event pages.  Otherwise, we create and
 190  * initialize the message and event pages.
 191  */
 192 void hv_synic_enable_regs(unsigned int cpu)
 193 {
 194         struct hv_per_cpu_context *hv_cpu
 195                 = per_cpu_ptr(hv_context.cpu_context, cpu);
 196         union hv_synic_simp simp;
 197         union hv_synic_siefp siefp;
 198         union hv_synic_sint shared_sint;
 199         union hv_synic_scontrol sctrl;
 200
 201         /* Setup the Synic's message page */
 202         simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
 203         simp.simp_enabled = 1;
 204         simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
 205                 >> HV_HYP_PAGE_SHIFT;
 206
 207         hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
 208
 209         /* Setup the Synic's event page */
 210         siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
 211         siefp.siefp_enabled = 1;
 212         siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
 213                 >> HV_HYP_PAGE_SHIFT;
 214
 215         hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
 216
 217         /* Setup the shared SINT. */
 218         if (vmbus_irq != -1)
 219                 enable_percpu_irq(vmbus_irq, 0);
 220         shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
 221                                         VMBUS_MESSAGE_SINT);
 222
 223         shared_sint.vector = vmbus_interrupt;
 224         shared_sint.masked = false;
 225
 226         /*
 227          * On architectures where Hyper-V doesn't support AEOI (e.g., ARM64),
 228          * it doesn't provide a recommendation flag and AEOI must be disabled.
 229          */
 230 #ifdef HV_DEPRECATING_AEOI_RECOMMENDED
 231         shared_sint.auto_eoi =
 232                         !(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED);
 233 #else
 234         shared_sint.auto_eoi = 0;
 235 #endif
 236         hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
 237                                 shared_sint.as_uint64);
 238
 239         /* Enable the global synic bit */
 240         sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
 241         sctrl.enable = 1;
 242
 243         hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
 244 }
 245
 246 int hv_synic_init(unsigned int cpu)
 247 {
 248         hv_synic_enable_regs(cpu);
 249
 250         hv_stimer_legacy_init(cpu, VMBUS_MESSAGE_SINT);
 251
 252         return 0;
 253 }
 254
 255 /*
 256  * hv_synic_cleanup - Cleanup routine for hv_synic_init().
 257  */
 258 void hv_synic_disable_regs(unsigned int cpu)
 259 {
 260         union hv_synic_sint shared_sint;
 261         union hv_synic_simp simp;
 262         union hv_synic_siefp siefp;
 263         union hv_synic_scontrol sctrl;
 264
 265         shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
 266                                         VMBUS_MESSAGE_SINT);
 267
 268         shared_sint.masked = 1;
 269
 270         /* Need to correctly cleanup in the case of SMP!!! */
 271         /* Disable the interrupt */
 272         hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
 273                                 shared_sint.as_uint64);
 274
 275         simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
 276         simp.simp_enabled = 0;
 277         simp.base_simp_gpa = 0;
 278
 279         hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
 280
 281         siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
 282         siefp.siefp_enabled = 0;
 283         siefp.base_siefp_gpa = 0;
 284
 285         hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
 286
 287         /* Disable the global synic bit */
 288         sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
 289         sctrl.enable = 0;
 290         hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
 291
 292         if (vmbus_irq != -1)
 293                 disable_percpu_irq(vmbus_irq);
 294 }
 295
 296 #define HV_MAX_TRIES 3
 297 /*
 298  * Scan the event flags page of 'this' CPU looking for any bit that is set.  If we find one
 299  * bit set, then wait for a few milliseconds.  Repeat these steps for a maximum of 3 times.
 300  * Return 'true', if there is still any set bit after this operation; 'false', otherwise.
 301  *
 302  * If a bit is set, that means there is a pending channel interrupt.  The expectation is
 303  * that the normal interrupt handling mechanism will find and process the channel interrupt
 304  * "very soon", and in the process clear the bit.
 305  */
 306 static bool hv_synic_event_pending(void)
 307 {
 308         struct hv_per_cpu_context *hv_cpu = this_cpu_ptr(hv_context.cpu_context);
 309         union hv_synic_event_flags *event =
 310                 (union hv_synic_event_flags *)hv_cpu->synic_event_page + VMBUS_MESSAGE_SINT;
 311         unsigned long *recv_int_page = event->flags; /* assumes VMBus version >= VERSION_WIN8 */
 312         bool pending;
 313         u32 relid;
 314         int tries = 0;
 315
 316 retry:
 317         pending = false;
 318         for_each_set_bit(relid, recv_int_page, HV_EVENT_FLAGS_COUNT) {
 319                 /* Special case - VMBus channel protocol messages */
 320                 if (relid == 0)
 321                         continue;
 322                 pending = true;
 323                 break;
 324         }
 325         if (pending && tries++ < HV_MAX_TRIES) {
 326                 usleep_range(10000, 20000);
 327                 goto retry;
 328         }
 329         return pending;
 330 }
 331
 332 int hv_synic_cleanup(unsigned int cpu)
 333 {
 334         struct vmbus_channel *channel, *sc;
 335         bool channel_found = false;
 336
 337         if (vmbus_connection.conn_state != CONNECTED)
 338                 goto always_cleanup;
 339
 340         /*
 341          * Hyper-V does not provide a way to change the connect CPU once
 342          * it is set; we must prevent the connect CPU from going offline
 343          * while the VM is running normally. But in the panic or kexec()
 344          * path where the vmbus is already disconnected, the CPU must be
 345          * allowed to shut down.
 346          */
 347         if (cpu == VMBUS_CONNECT_CPU)
 348                 return -EBUSY;
 349
 350         /*
 351          * Search for channels which are bound to the CPU we're about to
 352          * cleanup.  In case we find one and vmbus is still connected, we
 353          * fail; this will effectively prevent CPU offlining.
 354          *
 355          * TODO: Re-bind the channels to different CPUs.
 356          */
 357         mutex_lock(&vmbus_connection.channel_mutex);
 358         list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
 359                 if (channel->target_cpu == cpu) {
 360                         channel_found = true;
 361                         break;
 362                 }
 363                 list_for_each_entry(sc, &channel->sc_list, sc_list) {
 364                         if (sc->target_cpu == cpu) {
 365                                 channel_found = true;
 366                                 break;
 367                         }
 368                 }
 369                 if (channel_found)
 370                         break;
 371         }
 372         mutex_unlock(&vmbus_connection.channel_mutex);
 373
 374         if (channel_found)
 375                 return -EBUSY;
 376
 377         /*
 378          * channel_found == false means that any channels that were previously
 379          * assigned to the CPU have been reassigned elsewhere with a call of
 380          * vmbus_send_modifychannel().  Scan the event flags page looking for
 381          * bits that are set and waiting with a timeout for vmbus_chan_sched()
 382          * to process such bits.  If bits are still set after this operation
 383          * and VMBus is connected, fail the CPU offlining operation.
 384          */
 385         if (vmbus_proto_version >= VERSION_WIN10_V4_1 && hv_synic_event_pending())
 386                 return -EBUSY;
 387
 388 always_cleanup:
 389         hv_stimer_legacy_cleanup(cpu);
 390
 391         hv_synic_disable_regs(cpu);
 392
 393         return 0;
 394 }