drivers/hv/channel_mgmt.c

   1 /*
   2  * Copyright (c) 2009, Microsoft Corporation.
   3  *
   4  * This program is free software; you can redistribute it and/or modify it
   5  * under the terms and conditions of the GNU General Public License,
   6  * version 2, as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope it will be useful, but WITHOUT
   9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11  * more details.
  12  *
  13  * You should have received a copy of the GNU General Public License along with
  14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  15  * Place - Suite 330, Boston, MA 02111-1307 USA.
  16  *
  17  * Authors:
  18  *   Haiyang Zhang <haiyangz@microsoft.com>
  19  *   Hank Janssen  <hjanssen@microsoft.com>
  20  */
  21 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  22
  23 #include <linux/kernel.h>
  24 #include <linux/interrupt.h>
  25 #include <linux/sched.h>
  26 #include <linux/wait.h>
  27 #include <linux/mm.h>
  28 #include <linux/slab.h>
  29 #include <linux/list.h>
  30 #include <linux/module.h>
  31 #include <linux/completion.h>
  32 #include <linux/delay.h>
  33 #include <linux/hyperv.h>
  34 #include <asm/mshyperv.h>
  35
  36 #include "hyperv_vmbus.h"
  37
  38 static void init_vp_index(struct vmbus_channel *channel, u16 dev_type);
  39
  40 static const struct vmbus_device vmbus_devs[] = {
  41         /* IDE */
  42         { .dev_type = HV_IDE,
  43           HV_IDE_GUID,
  44           .perf_device = true,
  45         },
  46
  47         /* SCSI */
  48         { .dev_type = HV_SCSI,
  49           HV_SCSI_GUID,
  50           .perf_device = true,
  51         },
  52
  53         /* Fibre Channel */
  54         { .dev_type = HV_FC,
  55           HV_SYNTHFC_GUID,
  56           .perf_device = true,
  57         },
  58
  59         /* Synthetic NIC */
  60         { .dev_type = HV_NIC,
  61           HV_NIC_GUID,
  62           .perf_device = true,
  63         },
  64
  65         /* Network Direct */
  66         { .dev_type = HV_ND,
  67           HV_ND_GUID,
  68           .perf_device = true,
  69         },
  70
  71         /* PCIE */
  72         { .dev_type = HV_PCIE,
  73           HV_PCIE_GUID,
  74           .perf_device = true,
  75         },
  76
  77         /* Synthetic Frame Buffer */
  78         { .dev_type = HV_FB,
  79           HV_SYNTHVID_GUID,
  80           .perf_device = false,
  81         },
  82
  83         /* Synthetic Keyboard */
  84         { .dev_type = HV_KBD,
  85           HV_KBD_GUID,
  86           .perf_device = false,
  87         },
  88
  89         /* Synthetic MOUSE */
  90         { .dev_type = HV_MOUSE,
  91           HV_MOUSE_GUID,
  92           .perf_device = false,
  93         },
  94
  95         /* KVP */
  96         { .dev_type = HV_KVP,
  97           HV_KVP_GUID,
  98           .perf_device = false,
  99         },
 100
 101         /* Time Synch */
 102         { .dev_type = HV_TS,
 103           HV_TS_GUID,
 104           .perf_device = false,
 105         },
 106
 107         /* Heartbeat */
 108         { .dev_type = HV_HB,
 109           HV_HEART_BEAT_GUID,
 110           .perf_device = false,
 111         },
 112
 113         /* Shutdown */
 114         { .dev_type = HV_SHUTDOWN,
 115           HV_SHUTDOWN_GUID,
 116           .perf_device = false,
 117         },
 118
 119         /* File copy */
 120         { .dev_type = HV_FCOPY,
 121           HV_FCOPY_GUID,
 122           .perf_device = false,
 123         },
 124
 125         /* Backup */
 126         { .dev_type = HV_BACKUP,
 127           HV_VSS_GUID,
 128           .perf_device = false,
 129         },
 130
 131         /* Dynamic Memory */
 132         { .dev_type = HV_DM,
 133           HV_DM_GUID,
 134           .perf_device = false,
 135         },
 136
 137         /* Unknown GUID */
 138         { .dev_type = HV_UNKNOWN,
 139           .perf_device = false,
 140         },
 141 };
 142
 143 static const struct {
 144         uuid_le guid;
 145 } vmbus_unsupported_devs[] = {
 146         { HV_AVMA1_GUID },
 147         { HV_AVMA2_GUID },
 148         { HV_RDV_GUID   },
 149 };
 150
 151 /*
 152  * The rescinded channel may be blocked waiting for a response from the host;
 153  * take care of that.
 154  */
 155 static void vmbus_rescind_cleanup(struct vmbus_channel *channel)
 156 {
 157         struct vmbus_channel_msginfo *msginfo;
 158         unsigned long flags;
 159
 160
 161         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 162
 163         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
 164                                 msglistentry) {
 165
 166                 if (msginfo->waiting_channel == channel) {
 167                         complete(&msginfo->waitevent);
 168                         break;
 169                 }
 170         }
 171         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 172 }
 173
 174 static bool is_unsupported_vmbus_devs(const uuid_le *guid)
 175 {
 176         int i;
 177
 178         for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++)
 179                 if (!uuid_le_cmp(*guid, vmbus_unsupported_devs[i].guid))
 180                         return true;
 181         return false;
 182 }
 183
 184 static u16 hv_get_dev_type(const struct vmbus_channel *channel)
 185 {
 186         const uuid_le *guid = &channel->offermsg.offer.if_type;
 187         u16 i;
 188
 189         if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid))
 190                 return HV_UNKNOWN;
 191
 192         for (i = HV_IDE; i < HV_UNKNOWN; i++) {
 193                 if (!uuid_le_cmp(*guid, vmbus_devs[i].guid))
 194                         return i;
 195         }
 196         pr_info("Unknown GUID: %pUl\n", guid);
 197         return i;
 198 }
 199
 200 /**
 201  * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
 202  * @icmsghdrp: Pointer to msg header structure
 203  * @icmsg_negotiate: Pointer to negotiate message structure
 204  * @buf: Raw buffer channel data
 205  *
 206  * @icmsghdrp is of type &struct icmsg_hdr.
 207  * @negop is of type &struct icmsg_negotiate.
 208  * Set up and fill in default negotiate response message.
 209  *
 210  * The fw_version specifies the  framework version that
 211  * we can support and srv_version specifies the service
 212  * version we can support.
 213  *
 214  * Mainly used by Hyper-V drivers.
 215  */
 216 bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
 217                                 struct icmsg_negotiate *negop, u8 *buf,
 218                                 int fw_version, int srv_version)
 219 {
 220         int icframe_major, icframe_minor;
 221         int icmsg_major, icmsg_minor;
 222         int fw_major, fw_minor;
 223         int srv_major, srv_minor;
 224         int i;
 225         bool found_match = false;
 226
 227         icmsghdrp->icmsgsize = 0x10;
 228         fw_major = (fw_version >> 16);
 229         fw_minor = (fw_version & 0xFFFF);
 230
 231         srv_major = (srv_version >> 16);
 232         srv_minor = (srv_version & 0xFFFF);
 233
 234         negop = (struct icmsg_negotiate *)&buf[
 235                 sizeof(struct vmbuspipe_hdr) +
 236                 sizeof(struct icmsg_hdr)];
 237
 238         icframe_major = negop->icframe_vercnt;
 239         icframe_minor = 0;
 240
 241         icmsg_major = negop->icmsg_vercnt;
 242         icmsg_minor = 0;
 243
 244         /*
 245          * Select the framework version number we will
 246          * support.
 247          */
 248
 249         for (i = 0; i < negop->icframe_vercnt; i++) {
 250                 if ((negop->icversion_data[i].major == fw_major) &&
 251                    (negop->icversion_data[i].minor == fw_minor)) {
 252                         icframe_major = negop->icversion_data[i].major;
 253                         icframe_minor = negop->icversion_data[i].minor;
 254                         found_match = true;
 255                 }
 256         }
 257
 258         if (!found_match)
 259                 goto fw_error;
 260
 261         found_match = false;
 262
 263         for (i = negop->icframe_vercnt;
 264                  (i < negop->icframe_vercnt + negop->icmsg_vercnt); i++) {
 265                 if ((negop->icversion_data[i].major == srv_major) &&
 266                    (negop->icversion_data[i].minor == srv_minor)) {
 267                         icmsg_major = negop->icversion_data[i].major;
 268                         icmsg_minor = negop->icversion_data[i].minor;
 269                         found_match = true;
 270                 }
 271         }
 272
 273         /*
 274          * Respond with the framework and service
 275          * version numbers we can support.
 276          */
 277
 278 fw_error:
 279         if (!found_match) {
 280                 negop->icframe_vercnt = 0;
 281                 negop->icmsg_vercnt = 0;
 282         } else {
 283                 negop->icframe_vercnt = 1;
 284                 negop->icmsg_vercnt = 1;
 285         }
 286
 287         negop->icversion_data[0].major = icframe_major;
 288         negop->icversion_data[0].minor = icframe_minor;
 289         negop->icversion_data[1].major = icmsg_major;
 290         negop->icversion_data[1].minor = icmsg_minor;
 291         return found_match;
 292 }
 293
 294 EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
 295
 296 /*
 297  * alloc_channel - Allocate and initialize a vmbus channel object
 298  */
 299 static struct vmbus_channel *alloc_channel(void)
 300 {
 301         struct vmbus_channel *channel;
 302
 303         channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
 304         if (!channel)
 305                 return NULL;
 306
 307         channel->acquire_ring_lock = true;
 308         spin_lock_init(&channel->inbound_lock);
 309         spin_lock_init(&channel->lock);
 310
 311         INIT_LIST_HEAD(&channel->sc_list);
 312         INIT_LIST_HEAD(&channel->percpu_list);
 313
 314         return channel;
 315 }
 316
 317 /*
 318  * free_channel - Release the resources used by the vmbus channel object
 319  */
 320 static void free_channel(struct vmbus_channel *channel)
 321 {
 322         kfree(channel);
 323 }
 324
 325 static void percpu_channel_enq(void *arg)
 326 {
 327         struct vmbus_channel *channel = arg;
 328         int cpu = smp_processor_id();
 329
 330         list_add_tail(&channel->percpu_list, &hv_context.percpu_list[cpu]);
 331 }
 332
 333 static void percpu_channel_deq(void *arg)
 334 {
 335         struct vmbus_channel *channel = arg;
 336
 337         list_del(&channel->percpu_list);
 338 }
 339
 340
 341 static void vmbus_release_relid(u32 relid)
 342 {
 343         struct vmbus_channel_relid_released msg;
 344
 345         memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
 346         msg.child_relid = relid;
 347         msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
 348         vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released),
 349                        true);
 350 }
 351
 352 void hv_event_tasklet_disable(struct vmbus_channel *channel)
 353 {
 354         struct tasklet_struct *tasklet;
 355         tasklet = hv_context.event_dpc[channel->target_cpu];
 356         tasklet_disable(tasklet);
 357 }
 358
 359 void hv_event_tasklet_enable(struct vmbus_channel *channel)
 360 {
 361         struct tasklet_struct *tasklet;
 362         tasklet = hv_context.event_dpc[channel->target_cpu];
 363         tasklet_enable(tasklet);
 364
 365         /* In case there is any pending event */
 366         tasklet_schedule(tasklet);
 367 }
 368
 369 void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
 370 {
 371         unsigned long flags;
 372         struct vmbus_channel *primary_channel;
 373
 374         BUG_ON(!channel->rescind);
 375         BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
 376
 377         hv_event_tasklet_disable(channel);
 378         if (channel->target_cpu != get_cpu()) {
 379                 put_cpu();
 380                 smp_call_function_single(channel->target_cpu,
 381                                          percpu_channel_deq, channel, true);
 382         } else {
 383                 percpu_channel_deq(channel);
 384                 put_cpu();
 385         }
 386         hv_event_tasklet_enable(channel);
 387
 388         if (channel->primary_channel == NULL) {
 389                 list_del(&channel->listentry);
 390
 391                 primary_channel = channel;
 392         } else {
 393                 primary_channel = channel->primary_channel;
 394                 spin_lock_irqsave(&primary_channel->lock, flags);
 395                 list_del(&channel->sc_list);
 396                 primary_channel->num_sc--;
 397                 spin_unlock_irqrestore(&primary_channel->lock, flags);
 398         }
 399
 400         /*
 401          * We need to free the bit for init_vp_index() to work in the case
 402          * of sub-channel, when we reload drivers like hv_netvsc.
 403          */
 404         if (channel->affinity_policy == HV_LOCALIZED)
 405                 cpumask_clear_cpu(channel->target_cpu,
 406                                   &primary_channel->alloced_cpus_in_node);
 407
 408         vmbus_release_relid(relid);
 409
 410         free_channel(channel);
 411 }
 412
 413 void vmbus_free_channels(void)
 414 {
 415         struct vmbus_channel *channel, *tmp;
 416
 417         mutex_lock(&vmbus_connection.channel_mutex);
 418         list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
 419                 listentry) {
 420                 /* hv_process_channel_removal() needs this */
 421                 channel->rescind = true;
 422
 423                 vmbus_device_unregister(channel->device_obj);
 424         }
 425         mutex_unlock(&vmbus_connection.channel_mutex);
 426 }
 427
 428 /*
 429  * vmbus_process_offer - Process the offer by creating a channel/device
 430  * associated with this offer
 431  */
 432 static void vmbus_process_offer(struct vmbus_channel *newchannel)
 433 {
 434         struct vmbus_channel *channel;
 435         bool fnew = true;
 436         unsigned long flags;
 437         u16 dev_type;
 438         int ret;
 439
 440         /* Make sure this is a new offer */
 441         mutex_lock(&vmbus_connection.channel_mutex);
 442
 443         list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
 444                 if (!uuid_le_cmp(channel->offermsg.offer.if_type,
 445                         newchannel->offermsg.offer.if_type) &&
 446                         !uuid_le_cmp(channel->offermsg.offer.if_instance,
 447                                 newchannel->offermsg.offer.if_instance)) {
 448                         fnew = false;
 449                         break;
 450                 }
 451         }
 452
 453         if (fnew)
 454                 list_add_tail(&newchannel->listentry,
 455                               &vmbus_connection.chn_list);
 456
 457         mutex_unlock(&vmbus_connection.channel_mutex);
 458
 459         if (!fnew) {
 460                 /*
 461                  * Check to see if this is a sub-channel.
 462                  */
 463                 if (newchannel->offermsg.offer.sub_channel_index != 0) {
 464                         /*
 465                          * Process the sub-channel.
 466                          */
 467                         newchannel->primary_channel = channel;
 468                         spin_lock_irqsave(&channel->lock, flags);
 469                         list_add_tail(&newchannel->sc_list, &channel->sc_list);
 470                         channel->num_sc++;
 471                         spin_unlock_irqrestore(&channel->lock, flags);
 472                 } else
 473                         goto err_free_chan;
 474         }
 475
 476         dev_type = hv_get_dev_type(newchannel);
 477
 478         init_vp_index(newchannel, dev_type);
 479
 480         hv_event_tasklet_disable(newchannel);
 481         if (newchannel->target_cpu != get_cpu()) {
 482                 put_cpu();
 483                 smp_call_function_single(newchannel->target_cpu,
 484                                          percpu_channel_enq,
 485                                          newchannel, true);
 486         } else {
 487                 percpu_channel_enq(newchannel);
 488                 put_cpu();
 489         }
 490         hv_event_tasklet_enable(newchannel);
 491
 492         /*
 493          * This state is used to indicate a successful open
 494          * so that when we do close the channel normally, we
 495          * can cleanup properly
 496          */
 497         newchannel->state = CHANNEL_OPEN_STATE;
 498
 499         if (!fnew) {
 500                 if (channel->sc_creation_callback != NULL)
 501                         channel->sc_creation_callback(newchannel);
 502                 return;
 503         }
 504
 505         /*
 506          * Start the process of binding this offer to the driver
 507          * We need to set the DeviceObject field before calling
 508          * vmbus_child_dev_add()
 509          */
 510         newchannel->device_obj = vmbus_device_create(
 511                 &newchannel->offermsg.offer.if_type,
 512                 &newchannel->offermsg.offer.if_instance,
 513                 newchannel);
 514         if (!newchannel->device_obj)
 515                 goto err_deq_chan;
 516
 517         newchannel->device_obj->device_id = dev_type;
 518         /*
 519          * Add the new device to the bus. This will kick off device-driver
 520          * binding which eventually invokes the device driver's AddDevice()
 521          * method.
 522          */
 523         mutex_lock(&vmbus_connection.channel_mutex);
 524         ret = vmbus_device_register(newchannel->device_obj);
 525         mutex_unlock(&vmbus_connection.channel_mutex);
 526
 527         if (ret != 0) {
 528                 pr_err("unable to add child device object (relid %d)\n",
 529                         newchannel->offermsg.child_relid);
 530                 kfree(newchannel->device_obj);
 531                 goto err_deq_chan;
 532         }
 533         return;
 534
 535 err_deq_chan:
 536         mutex_lock(&vmbus_connection.channel_mutex);
 537         list_del(&newchannel->listentry);
 538         mutex_unlock(&vmbus_connection.channel_mutex);
 539
 540         hv_event_tasklet_disable(newchannel);
 541         if (newchannel->target_cpu != get_cpu()) {
 542                 put_cpu();
 543                 smp_call_function_single(newchannel->target_cpu,
 544                                          percpu_channel_deq, newchannel, true);
 545         } else {
 546                 percpu_channel_deq(newchannel);
 547                 put_cpu();
 548         }
 549         hv_event_tasklet_enable(newchannel);
 550
 551         vmbus_release_relid(newchannel->offermsg.child_relid);
 552
 553 err_free_chan:
 554         free_channel(newchannel);
 555 }
 556
 557 /*
 558  * We use this state to statically distribute the channel interrupt load.
 559  */
 560 static int next_numa_node_id;
 561
 562 /*
 563  * Starting with Win8, we can statically distribute the incoming
 564  * channel interrupt load by binding a channel to VCPU.
 565  * We do this in a hierarchical fashion:
 566  * First distribute the primary channels across available NUMA nodes
 567  * and then distribute the subchannels amongst the CPUs in the NUMA
 568  * node assigned to the primary channel.
 569  *
 570  * For pre-win8 hosts or non-performance critical channels we assign the
 571  * first CPU in the first NUMA node.
 572  */
 573 static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
 574 {
 575         u32 cur_cpu;
 576         bool perf_chn = vmbus_devs[dev_type].perf_device;
 577         struct vmbus_channel *primary = channel->primary_channel;
 578         int next_node;
 579         struct cpumask available_mask;
 580         struct cpumask *alloced_mask;
 581
 582         if ((vmbus_proto_version == VERSION_WS2008) ||
 583             (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) {
 584                 /*
 585                  * Prior to win8, all channel interrupts are
 586                  * delivered on cpu 0.
 587                  * Also if the channel is not a performance critical
 588                  * channel, bind it to cpu 0.
 589                  */
 590                 channel->numa_node = 0;
 591                 channel->target_cpu = 0;
 592                 channel->target_vp = hv_context.vp_index[0];
 593                 return;
 594         }
 595
 596         /*
 597          * Based on the channel affinity policy, we will assign the NUMA
 598          * nodes.
 599          */
 600
 601         if ((channel->affinity_policy == HV_BALANCED) || (!primary)) {
 602                 while (true) {
 603                         next_node = next_numa_node_id++;
 604                         if (next_node == nr_node_ids) {
 605                                 next_node = next_numa_node_id = 0;
 606                                 continue;
 607                         }
 608                         if (cpumask_empty(cpumask_of_node(next_node)))
 609                                 continue;
 610                         break;
 611                 }
 612                 channel->numa_node = next_node;
 613                 primary = channel;
 614         }
 615         alloced_mask = &hv_context.hv_numa_map[primary->numa_node];
 616
 617         if (cpumask_weight(alloced_mask) ==
 618             cpumask_weight(cpumask_of_node(primary->numa_node))) {
 619                 /*
 620                  * We have cycled through all the CPUs in the node;
 621                  * reset the alloced map.
 622                  */
 623                 cpumask_clear(alloced_mask);
 624         }
 625
 626         cpumask_xor(&available_mask, alloced_mask,
 627                     cpumask_of_node(primary->numa_node));
 628
 629         cur_cpu = -1;
 630
 631         if (primary->affinity_policy == HV_LOCALIZED) {
 632                 /*
 633                  * Normally Hyper-V host doesn't create more subchannels
 634                  * than there are VCPUs on the node but it is possible when not
 635                  * all present VCPUs on the node are initialized by guest.
 636                  * Clear the alloced_cpus_in_node to start over.
 637                  */
 638                 if (cpumask_equal(&primary->alloced_cpus_in_node,
 639                                   cpumask_of_node(primary->numa_node)))
 640                         cpumask_clear(&primary->alloced_cpus_in_node);
 641         }
 642
 643         while (true) {
 644                 cur_cpu = cpumask_next(cur_cpu, &available_mask);
 645                 if (cur_cpu >= nr_cpu_ids) {
 646                         cur_cpu = -1;
 647                         cpumask_copy(&available_mask,
 648                                      cpumask_of_node(primary->numa_node));
 649                         continue;
 650                 }
 651
 652                 if (primary->affinity_policy == HV_LOCALIZED) {
 653                         /*
 654                          * NOTE: in the case of sub-channel, we clear the
 655                          * sub-channel related bit(s) in
 656                          * primary->alloced_cpus_in_node in
 657                          * hv_process_channel_removal(), so when we
 658                          * reload drivers like hv_netvsc in SMP guest, here
 659                          * we're able to re-allocate
 660                          * bit from primary->alloced_cpus_in_node.
 661                          */
 662                         if (!cpumask_test_cpu(cur_cpu,
 663                                               &primary->alloced_cpus_in_node)) {
 664                                 cpumask_set_cpu(cur_cpu,
 665                                                 &primary->alloced_cpus_in_node);
 666                                 cpumask_set_cpu(cur_cpu, alloced_mask);
 667                                 break;
 668                         }
 669                 } else {
 670                         cpumask_set_cpu(cur_cpu, alloced_mask);
 671                         break;
 672                 }
 673         }
 674
 675         channel->target_cpu = cur_cpu;
 676         channel->target_vp = hv_context.vp_index[cur_cpu];
 677 }
 678
 679 static void vmbus_wait_for_unload(void)
 680 {
 681         int cpu;
 682         void *page_addr;
 683         struct hv_message *msg;
 684         struct vmbus_channel_message_header *hdr;
 685         u32 message_type;
 686
 687         /*
 688          * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was
 689          * used for initial contact or to CPU0 depending on host version. When
 690          * we're crashing on a different CPU let's hope that IRQ handler on
 691          * the cpu which receives CHANNELMSG_UNLOAD_RESPONSE is still
 692          * functional and vmbus_unload_response() will complete
 693          * vmbus_connection.unload_event. If not, the last thing we can do is
 694          * read message pages for all CPUs directly.
 695          */
 696         while (1) {
 697                 if (completion_done(&vmbus_connection.unload_event))
 698                         break;
 699
 700                 for_each_online_cpu(cpu) {
 701                         page_addr = hv_context.synic_message_page[cpu];
 702                         msg = (struct hv_message *)page_addr +
 703                                 VMBUS_MESSAGE_SINT;
 704
 705                         message_type = READ_ONCE(msg->header.message_type);
 706                         if (message_type == HVMSG_NONE)
 707                                 continue;
 708
 709                         hdr = (struct vmbus_channel_message_header *)
 710                                 msg->u.payload;
 711
 712                         if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE)
 713                                 complete(&vmbus_connection.unload_event);
 714
 715                         vmbus_signal_eom(msg, message_type);
 716                 }
 717
 718                 mdelay(10);
 719         }
 720
 721         /*
 722          * We're crashing and already got the UNLOAD_RESPONSE, cleanup all
 723          * maybe-pending messages on all CPUs to be able to receive new
 724          * messages after we reconnect.
 725          */
 726         for_each_online_cpu(cpu) {
 727                 page_addr = hv_context.synic_message_page[cpu];
 728                 msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
 729                 msg->header.message_type = HVMSG_NONE;
 730         }
 731 }
 732
 733 /*
 734  * vmbus_unload_response - Handler for the unload response.
 735  */
 736 static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
 737 {
 738         /*
 739          * This is a global event; just wakeup the waiting thread.
 740          * Once we successfully unload, we can cleanup the monitor state.
 741          */
 742         complete(&vmbus_connection.unload_event);
 743 }
 744
 745 void vmbus_initiate_unload(bool crash)
 746 {
 747         struct vmbus_channel_message_header hdr;
 748
 749         /* Pre-Win2012R2 hosts don't support reconnect */
 750         if (vmbus_proto_version < VERSION_WIN8_1)
 751                 return;
 752
 753         init_completion(&vmbus_connection.unload_event);
 754         memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
 755         hdr.msgtype = CHANNELMSG_UNLOAD;
 756         vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header),
 757                        !crash);
 758
 759         /*
 760          * vmbus_initiate_unload() is also called on crash and the crash can be
 761          * happening in an interrupt context, where scheduling is impossible.
 762          */
 763         if (!crash)
 764                 wait_for_completion(&vmbus_connection.unload_event);
 765         else
 766                 vmbus_wait_for_unload();
 767 }
 768
 769 /*
 770  * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
 771  *
 772  */
 773 static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 774 {
 775         struct vmbus_channel_offer_channel *offer;
 776         struct vmbus_channel *newchannel;
 777
 778         offer = (struct vmbus_channel_offer_channel *)hdr;
 779
 780         /* Allocate the channel object and save this offer. */
 781         newchannel = alloc_channel();
 782         if (!newchannel) {
 783                 pr_err("Unable to allocate channel object\n");
 784                 return;
 785         }
 786
 787         /*
 788          * By default we setup state to enable batched
 789          * reading. A specific service can choose to
 790          * disable this prior to opening the channel.
 791          */
 792         newchannel->batched_reading = true;
 793
 794         /*
 795          * Setup state for signalling the host.
 796          */
 797         newchannel->sig_event = (struct hv_input_signal_event *)
 798                                 (ALIGN((unsigned long)
 799                                 &newchannel->sig_buf,
 800                                 HV_HYPERCALL_PARAM_ALIGN));
 801
 802         newchannel->sig_event->connectionid.asu32 = 0;
 803         newchannel->sig_event->connectionid.u.id = VMBUS_EVENT_CONNECTION_ID;
 804         newchannel->sig_event->flag_number = 0;
 805         newchannel->sig_event->rsvdz = 0;
 806
 807         if (vmbus_proto_version != VERSION_WS2008) {
 808                 newchannel->is_dedicated_interrupt =
 809                                 (offer->is_dedicated_interrupt != 0);
 810                 newchannel->sig_event->connectionid.u.id =
 811                                 offer->connection_id;
 812         }
 813
 814         memcpy(&newchannel->offermsg, offer,
 815                sizeof(struct vmbus_channel_offer_channel));
 816         newchannel->monitor_grp = (u8)offer->monitorid / 32;
 817         newchannel->monitor_bit = (u8)offer->monitorid % 32;
 818
 819         vmbus_process_offer(newchannel);
 820 }
 821
 822 /*
 823  * vmbus_onoffer_rescind - Rescind offer handler.
 824  *
 825  * We queue a work item to process this offer synchronously
 826  */
 827 static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 828 {
 829         struct vmbus_channel_rescind_offer *rescind;
 830         struct vmbus_channel *channel;
 831         unsigned long flags;
 832         struct device *dev;
 833
 834         rescind = (struct vmbus_channel_rescind_offer *)hdr;
 835
 836         mutex_lock(&vmbus_connection.channel_mutex);
 837         channel = relid2channel(rescind->child_relid);
 838
 839         if (channel == NULL) {
 840                 /*
 841                  * This is very impossible, because in
 842                  * vmbus_process_offer(), we have already invoked
 843                  * vmbus_release_relid() on error.
 844                  */
 845                 goto out;
 846         }
 847
 848         spin_lock_irqsave(&channel->lock, flags);
 849         channel->rescind = true;
 850         spin_unlock_irqrestore(&channel->lock, flags);
 851
 852         vmbus_rescind_cleanup(channel);
 853
 854         if (channel->device_obj) {
 855                 if (channel->chn_rescind_callback) {
 856                         channel->chn_rescind_callback(channel);
 857                         goto out;
 858                 }
 859                 /*
 860                  * We will have to unregister this device from the
 861                  * driver core.
 862                  */
 863                 dev = get_device(&channel->device_obj->device);
 864                 if (dev) {
 865                         vmbus_device_unregister(channel->device_obj);
 866                         put_device(dev);
 867                 }
 868         } else {
 869                 hv_process_channel_removal(channel,
 870                         channel->offermsg.child_relid);
 871         }
 872
 873 out:
 874         mutex_unlock(&vmbus_connection.channel_mutex);
 875 }
 876
 877 void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
 878 {
 879         mutex_lock(&vmbus_connection.channel_mutex);
 880
 881         BUG_ON(!is_hvsock_channel(channel));
 882
 883         channel->rescind = true;
 884         vmbus_device_unregister(channel->device_obj);
 885
 886         mutex_unlock(&vmbus_connection.channel_mutex);
 887 }
 888 EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister);
 889
 890
 891 /*
 892  * vmbus_onoffers_delivered -
 893  * This is invoked when all offers have been delivered.
 894  *
 895  * Nothing to do here.
 896  */
 897 static void vmbus_onoffers_delivered(
 898                         struct vmbus_channel_message_header *hdr)
 899 {
 900 }
 901
 902 /*
 903  * vmbus_onopen_result - Open result handler.
 904  *
 905  * This is invoked when we received a response to our channel open request.
 906  * Find the matching request, copy the response and signal the requesting
 907  * thread.
 908  */
 909 static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
 910 {
 911         struct vmbus_channel_open_result *result;
 912         struct vmbus_channel_msginfo *msginfo;
 913         struct vmbus_channel_message_header *requestheader;
 914         struct vmbus_channel_open_channel *openmsg;
 915         unsigned long flags;
 916
 917         result = (struct vmbus_channel_open_result *)hdr;
 918
 919         /*
 920          * Find the open msg, copy the result and signal/unblock the wait event
 921          */
 922         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 923
 924         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
 925                                 msglistentry) {
 926                 requestheader =
 927                         (struct vmbus_channel_message_header *)msginfo->msg;
 928
 929                 if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
 930                         openmsg =
 931                         (struct vmbus_channel_open_channel *)msginfo->msg;
 932                         if (openmsg->child_relid == result->child_relid &&
 933                             openmsg->openid == result->openid) {
 934                                 memcpy(&msginfo->response.open_result,
 935                                        result,
 936                                        sizeof(
 937                                         struct vmbus_channel_open_result));
 938                                 complete(&msginfo->waitevent);
 939                                 break;
 940                         }
 941                 }
 942         }
 943         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 944 }
 945
 946 /*
 947  * vmbus_ongpadl_created - GPADL created handler.
 948  *
 949  * This is invoked when we received a response to our gpadl create request.
 950  * Find the matching request, copy the response and signal the requesting
 951  * thread.
 952  */
 953 static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
 954 {
 955         struct vmbus_channel_gpadl_created *gpadlcreated;
 956         struct vmbus_channel_msginfo *msginfo;
 957         struct vmbus_channel_message_header *requestheader;
 958         struct vmbus_channel_gpadl_header *gpadlheader;
 959         unsigned long flags;
 960
 961         gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
 962
 963         /*
 964          * Find the establish msg, copy the result and signal/unblock the wait
 965          * event
 966          */
 967         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 968
 969         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
 970                                 msglistentry) {
 971                 requestheader =
 972                         (struct vmbus_channel_message_header *)msginfo->msg;
 973
 974                 if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
 975                         gpadlheader =
 976                         (struct vmbus_channel_gpadl_header *)requestheader;
 977
 978                         if ((gpadlcreated->child_relid ==
 979                              gpadlheader->child_relid) &&
 980                             (gpadlcreated->gpadl == gpadlheader->gpadl)) {
 981                                 memcpy(&msginfo->response.gpadl_created,
 982                                        gpadlcreated,
 983                                        sizeof(
 984                                         struct vmbus_channel_gpadl_created));
 985                                 complete(&msginfo->waitevent);
 986                                 break;
 987                         }
 988                 }
 989         }
 990         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 991 }
 992
 993 /*
 994  * vmbus_ongpadl_torndown - GPADL torndown handler.
 995  *
 996  * This is invoked when we received a response to our gpadl teardown request.
 997  * Find the matching request, copy the response and signal the requesting
 998  * thread.
 999  */
1000 static void vmbus_ongpadl_torndown(
1001                         struct vmbus_channel_message_header *hdr)
1002 {
1003         struct vmbus_channel_gpadl_torndown *gpadl_torndown;
1004         struct vmbus_channel_msginfo *msginfo;
1005         struct vmbus_channel_message_header *requestheader;
1006         struct vmbus_channel_gpadl_teardown *gpadl_teardown;
1007         unsigned long flags;
1008
1009         gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
1010
1011         /*
1012          * Find the open msg, copy the result and signal/unblock the wait event
1013          */
1014         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1015
1016         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1017                                 msglistentry) {
1018                 requestheader =
1019                         (struct vmbus_channel_message_header *)msginfo->msg;
1020
1021                 if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
1022                         gpadl_teardown =
1023                         (struct vmbus_channel_gpadl_teardown *)requestheader;
1024
1025                         if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
1026                                 memcpy(&msginfo->response.gpadl_torndown,
1027                                        gpadl_torndown,
1028                                        sizeof(
1029                                         struct vmbus_channel_gpadl_torndown));
1030                                 complete(&msginfo->waitevent);
1031                                 break;
1032                         }
1033                 }
1034         }
1035         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1036 }
1037
1038 /*
1039  * vmbus_onversion_response - Version response handler
1040  *
1041  * This is invoked when we received a response to our initiate contact request.
1042  * Find the matching request, copy the response and signal the requesting
1043  * thread.
1044  */
1045 static void vmbus_onversion_response(
1046                 struct vmbus_channel_message_header *hdr)
1047 {
1048         struct vmbus_channel_msginfo *msginfo;
1049         struct vmbus_channel_message_header *requestheader;
1050         struct vmbus_channel_version_response *version_response;
1051         unsigned long flags;
1052
1053         version_response = (struct vmbus_channel_version_response *)hdr;
1054         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1055
1056         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1057                                 msglistentry) {
1058                 requestheader =
1059                         (struct vmbus_channel_message_header *)msginfo->msg;
1060
1061                 if (requestheader->msgtype ==
1062                     CHANNELMSG_INITIATE_CONTACT) {
1063                         memcpy(&msginfo->response.version_response,
1064                               version_response,
1065                               sizeof(struct vmbus_channel_version_response));
1066                         complete(&msginfo->waitevent);
1067                 }
1068         }
1069         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1070 }
1071
1072 /* Channel message dispatch table */
1073 struct vmbus_channel_message_table_entry
1074         channel_message_table[CHANNELMSG_COUNT] = {
1075         {CHANNELMSG_INVALID,                    0, NULL},
1076         {CHANNELMSG_OFFERCHANNEL,               0, vmbus_onoffer},
1077         {CHANNELMSG_RESCIND_CHANNELOFFER,       0, vmbus_onoffer_rescind},
1078         {CHANNELMSG_REQUESTOFFERS,              0, NULL},
1079         {CHANNELMSG_ALLOFFERS_DELIVERED,        1, vmbus_onoffers_delivered},
1080         {CHANNELMSG_OPENCHANNEL,                0, NULL},
1081         {CHANNELMSG_OPENCHANNEL_RESULT,         1, vmbus_onopen_result},
1082         {CHANNELMSG_CLOSECHANNEL,               0, NULL},
1083         {CHANNELMSG_GPADL_HEADER,               0, NULL},
1084         {CHANNELMSG_GPADL_BODY,                 0, NULL},
1085         {CHANNELMSG_GPADL_CREATED,              1, vmbus_ongpadl_created},
1086         {CHANNELMSG_GPADL_TEARDOWN,             0, NULL},
1087         {CHANNELMSG_GPADL_TORNDOWN,             1, vmbus_ongpadl_torndown},
1088         {CHANNELMSG_RELID_RELEASED,             0, NULL},
1089         {CHANNELMSG_INITIATE_CONTACT,           0, NULL},
1090         {CHANNELMSG_VERSION_RESPONSE,           1, vmbus_onversion_response},
1091         {CHANNELMSG_UNLOAD,                     0, NULL},
1092         {CHANNELMSG_UNLOAD_RESPONSE,            1, vmbus_unload_response},
1093         {CHANNELMSG_18,                         0, NULL},
1094         {CHANNELMSG_19,                         0, NULL},
1095         {CHANNELMSG_20,                         0, NULL},
1096         {CHANNELMSG_TL_CONNECT_REQUEST,         0, NULL},
1097 };
1098
1099 /*
1100  * vmbus_onmessage - Handler for channel protocol messages.
1101  *
1102  * This is invoked in the vmbus worker thread context.
1103  */
1104 void vmbus_onmessage(void *context)
1105 {
1106         struct hv_message *msg = context;
1107         struct vmbus_channel_message_header *hdr;
1108         int size;
1109
1110         hdr = (struct vmbus_channel_message_header *)msg->u.payload;
1111         size = msg->header.payload_size;
1112
1113         if (hdr->msgtype >= CHANNELMSG_COUNT) {
1114                 pr_err("Received invalid channel message type %d size %d\n",
1115                            hdr->msgtype, size);
1116                 print_hex_dump_bytes("", DUMP_PREFIX_NONE,
1117                                      (unsigned char *)msg->u.payload, size);
1118                 return;
1119         }
1120
1121         if (channel_message_table[hdr->msgtype].message_handler)
1122                 channel_message_table[hdr->msgtype].message_handler(hdr);
1123         else
1124                 pr_err("Unhandled channel message type %d\n", hdr->msgtype);
1125 }
1126
1127 /*
1128  * vmbus_request_offers - Send a request to get all our pending offers.
1129  */
1130 int vmbus_request_offers(void)
1131 {
1132         struct vmbus_channel_message_header *msg;
1133         struct vmbus_channel_msginfo *msginfo;
1134         int ret;
1135
1136         msginfo = kmalloc(sizeof(*msginfo) +
1137                           sizeof(struct vmbus_channel_message_header),
1138                           GFP_KERNEL);
1139         if (!msginfo)
1140                 return -ENOMEM;
1141
1142         msg = (struct vmbus_channel_message_header *)msginfo->msg;
1143
1144         msg->msgtype = CHANNELMSG_REQUESTOFFERS;
1145
1146
1147         ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_message_header),
1148                              true);
1149         if (ret != 0) {
1150                 pr_err("Unable to request offers - %d\n", ret);
1151
1152                 goto cleanup;
1153         }
1154
1155 cleanup:
1156         kfree(msginfo);
1157
1158         return ret;
1159 }
1160
1161 /*
1162  * Retrieve the (sub) channel on which to send an outgoing request.
1163  * When a primary channel has multiple sub-channels, we try to
1164  * distribute the load equally amongst all available channels.
1165  */
1166 struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
1167 {
1168         struct list_head *cur, *tmp;
1169         int cur_cpu;
1170         struct vmbus_channel *cur_channel;
1171         struct vmbus_channel *outgoing_channel = primary;
1172         int next_channel;
1173         int i = 1;
1174
1175         if (list_empty(&primary->sc_list))
1176                 return outgoing_channel;
1177
1178         next_channel = primary->next_oc++;
1179
1180         if (next_channel > (primary->num_sc)) {
1181                 primary->next_oc = 0;
1182                 return outgoing_channel;
1183         }
1184
1185         cur_cpu = hv_context.vp_index[get_cpu()];
1186         put_cpu();
1187         list_for_each_safe(cur, tmp, &primary->sc_list) {
1188                 cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
1189                 if (cur_channel->state != CHANNEL_OPENED_STATE)
1190                         continue;
1191
1192                 if (cur_channel->target_vp == cur_cpu)
1193                         return cur_channel;
1194
1195                 if (i == next_channel)
1196                         return cur_channel;
1197
1198                 i++;
1199         }
1200
1201         return outgoing_channel;
1202 }
1203 EXPORT_SYMBOL_GPL(vmbus_get_outgoing_channel);
1204
1205 static void invoke_sc_cb(struct vmbus_channel *primary_channel)
1206 {
1207         struct list_head *cur, *tmp;
1208         struct vmbus_channel *cur_channel;
1209
1210         if (primary_channel->sc_creation_callback == NULL)
1211                 return;
1212
1213         list_for_each_safe(cur, tmp, &primary_channel->sc_list) {
1214                 cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
1215
1216                 primary_channel->sc_creation_callback(cur_channel);
1217         }
1218 }
1219
1220 void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
1221                                 void (*sc_cr_cb)(struct vmbus_channel *new_sc))
1222 {
1223         primary_channel->sc_creation_callback = sc_cr_cb;
1224 }
1225 EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);
1226
1227 bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
1228 {
1229         bool ret;
1230
1231         ret = !list_empty(&primary->sc_list);
1232
1233         if (ret) {
1234                 /*
1235                  * Invoke the callback on sub-channel creation.
1236                  * This will present a uniform interface to the
1237                  * clients.
1238                  */
1239                 invoke_sc_cb(primary);
1240         }
1241
1242         return ret;
1243 }
1244 EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);
1245
1246 void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel,
1247                 void (*chn_rescind_cb)(struct vmbus_channel *))
1248 {
1249         channel->chn_rescind_callback = chn_rescind_cb;
1250 }
1251 EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback);