drivers/hv/channel_mgmt.c

   1 /*
   2  * Copyright (c) 2009, Microsoft Corporation.
   3  *
   4  * This program is free software; you can redistribute it and/or modify it
   5  * under the terms and conditions of the GNU General Public License,
   6  * version 2, as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope it will be useful, but WITHOUT
   9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11  * more details.
  12  *
  13  * You should have received a copy of the GNU General Public License along with
  14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  15  * Place - Suite 330, Boston, MA 02111-1307 USA.
  16  *
  17  * Authors:
  18  *   Haiyang Zhang <haiyangz@microsoft.com>
  19  *   Hank Janssen  <hjanssen@microsoft.com>
  20  */
  21 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  22
  23 #include <linux/kernel.h>
  24 #include <linux/sched.h>
  25 #include <linux/wait.h>
  26 #include <linux/mm.h>
  27 #include <linux/slab.h>
  28 #include <linux/list.h>
  29 #include <linux/module.h>
  30 #include <linux/completion.h>
  31 #include <linux/hyperv.h>
  32
  33 #include "hyperv_vmbus.h"
  34
  35 static void init_vp_index(struct vmbus_channel *channel,
  36                           const uuid_le *type_guid);
  37
  38 /**
  39  * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
  40  * @icmsghdrp: Pointer to msg header structure
  41  * @icmsg_negotiate: Pointer to negotiate message structure
  42  * @buf: Raw buffer channel data
  43  *
  44  * @icmsghdrp is of type &struct icmsg_hdr.
  45  * @negop is of type &struct icmsg_negotiate.
  46  * Set up and fill in default negotiate response message.
  47  *
  48  * The fw_version specifies the  framework version that
  49  * we can support and srv_version specifies the service
  50  * version we can support.
  51  *
  52  * Mainly used by Hyper-V drivers.
  53  */
  54 bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
  55                                 struct icmsg_negotiate *negop, u8 *buf,
  56                                 int fw_version, int srv_version)
  57 {
  58         int icframe_major, icframe_minor;
  59         int icmsg_major, icmsg_minor;
  60         int fw_major, fw_minor;
  61         int srv_major, srv_minor;
  62         int i;
  63         bool found_match = false;
  64
  65         icmsghdrp->icmsgsize = 0x10;
  66         fw_major = (fw_version >> 16);
  67         fw_minor = (fw_version & 0xFFFF);
  68
  69         srv_major = (srv_version >> 16);
  70         srv_minor = (srv_version & 0xFFFF);
  71
  72         negop = (struct icmsg_negotiate *)&buf[
  73                 sizeof(struct vmbuspipe_hdr) +
  74                 sizeof(struct icmsg_hdr)];
  75
  76         icframe_major = negop->icframe_vercnt;
  77         icframe_minor = 0;
  78
  79         icmsg_major = negop->icmsg_vercnt;
  80         icmsg_minor = 0;
  81
  82         /*
  83          * Select the framework version number we will
  84          * support.
  85          */
  86
  87         for (i = 0; i < negop->icframe_vercnt; i++) {
  88                 if ((negop->icversion_data[i].major == fw_major) &&
  89                    (negop->icversion_data[i].minor == fw_minor)) {
  90                         icframe_major = negop->icversion_data[i].major;
  91                         icframe_minor = negop->icversion_data[i].minor;
  92                         found_match = true;
  93                 }
  94         }
  95
  96         if (!found_match)
  97                 goto fw_error;
  98
  99         found_match = false;
 100
 101         for (i = negop->icframe_vercnt;
 102                  (i < negop->icframe_vercnt + negop->icmsg_vercnt); i++) {
 103                 if ((negop->icversion_data[i].major == srv_major) &&
 104                    (negop->icversion_data[i].minor == srv_minor)) {
 105                         icmsg_major = negop->icversion_data[i].major;
 106                         icmsg_minor = negop->icversion_data[i].minor;
 107                         found_match = true;
 108                 }
 109         }
 110
 111         /*
 112          * Respond with the framework and service
 113          * version numbers we can support.
 114          */
 115
 116 fw_error:
 117         if (!found_match) {
 118                 negop->icframe_vercnt = 0;
 119                 negop->icmsg_vercnt = 0;
 120         } else {
 121                 negop->icframe_vercnt = 1;
 122                 negop->icmsg_vercnt = 1;
 123         }
 124
 125         negop->icversion_data[0].major = icframe_major;
 126         negop->icversion_data[0].minor = icframe_minor;
 127         negop->icversion_data[1].major = icmsg_major;
 128         negop->icversion_data[1].minor = icmsg_minor;
 129         return found_match;
 130 }
 131
 132 EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
 133
 134 /*
 135  * alloc_channel - Allocate and initialize a vmbus channel object
 136  */
 137 static struct vmbus_channel *alloc_channel(void)
 138 {
 139         static atomic_t chan_num = ATOMIC_INIT(0);
 140         struct vmbus_channel *channel;
 141
 142         channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
 143         if (!channel)
 144                 return NULL;
 145
 146         channel->id = atomic_inc_return(&chan_num);
 147         spin_lock_init(&channel->inbound_lock);
 148         spin_lock_init(&channel->lock);
 149
 150         INIT_LIST_HEAD(&channel->sc_list);
 151         INIT_LIST_HEAD(&channel->percpu_list);
 152
 153         return channel;
 154 }
 155
 156 /*
 157  * free_channel - Release the resources used by the vmbus channel object
 158  */
 159 static void free_channel(struct vmbus_channel *channel)
 160 {
 161         kfree(channel);
 162 }
 163
 164 static void percpu_channel_enq(void *arg)
 165 {
 166         struct vmbus_channel *channel = arg;
 167         int cpu = smp_processor_id();
 168
 169         list_add_tail(&channel->percpu_list, &hv_context.percpu_list[cpu]);
 170 }
 171
 172 static void percpu_channel_deq(void *arg)
 173 {
 174         struct vmbus_channel *channel = arg;
 175
 176         list_del(&channel->percpu_list);
 177 }
 178
 179
 180 void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
 181 {
 182         struct vmbus_channel_relid_released msg;
 183         unsigned long flags;
 184         struct vmbus_channel *primary_channel;
 185
 186         memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
 187         msg.child_relid = relid;
 188         msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
 189         vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released));
 190
 191         if (channel == NULL)
 192                 return;
 193
 194         if (channel->target_cpu != get_cpu()) {
 195                 put_cpu();
 196                 smp_call_function_single(channel->target_cpu,
 197                                          percpu_channel_deq, channel, true);
 198         } else {
 199                 percpu_channel_deq(channel);
 200                 put_cpu();
 201         }
 202
 203         if (channel->primary_channel == NULL) {
 204                 spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
 205                 list_del(&channel->listentry);
 206                 spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
 207
 208                 primary_channel = channel;
 209         } else {
 210                 primary_channel = channel->primary_channel;
 211                 spin_lock_irqsave(&primary_channel->lock, flags);
 212                 list_del(&channel->sc_list);
 213                 primary_channel->num_sc--;
 214                 spin_unlock_irqrestore(&primary_channel->lock, flags);
 215         }
 216
 217         /*
 218          * We need to free the bit for init_vp_index() to work in the case
 219          * of sub-channel, when we reload drivers like hv_netvsc.
 220          */
 221         cpumask_clear_cpu(channel->target_cpu,
 222                           &primary_channel->alloced_cpus_in_node);
 223
 224         free_channel(channel);
 225 }
 226
 227 void vmbus_free_channels(void)
 228 {
 229         struct vmbus_channel *channel, *tmp;
 230
 231         list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
 232                 listentry) {
 233                 /* if we don't set rescind to true, vmbus_close_internal()
 234                  * won't invoke hv_process_channel_removal().
 235                  */
 236                 channel->rescind = true;
 237
 238                 vmbus_device_unregister(channel->device_obj);
 239         }
 240 }
 241
 242 /*
 243  * vmbus_process_offer - Process the offer by creating a channel/device
 244  * associated with this offer
 245  */
 246 static void vmbus_process_offer(struct vmbus_channel *newchannel)
 247 {
 248         struct vmbus_channel *channel;
 249         bool fnew = true;
 250         unsigned long flags;
 251
 252         /* Make sure this is a new offer */
 253         spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
 254
 255         list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
 256                 if (!uuid_le_cmp(channel->offermsg.offer.if_type,
 257                         newchannel->offermsg.offer.if_type) &&
 258                         !uuid_le_cmp(channel->offermsg.offer.if_instance,
 259                                 newchannel->offermsg.offer.if_instance)) {
 260                         fnew = false;
 261                         break;
 262                 }
 263         }
 264
 265         if (fnew)
 266                 list_add_tail(&newchannel->listentry,
 267                               &vmbus_connection.chn_list);
 268
 269         spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
 270
 271         if (!fnew) {
 272                 /*
 273                  * Check to see if this is a sub-channel.
 274                  */
 275                 if (newchannel->offermsg.offer.sub_channel_index != 0) {
 276                         /*
 277                          * Process the sub-channel.
 278                          */
 279                         newchannel->primary_channel = channel;
 280                         spin_lock_irqsave(&channel->lock, flags);
 281                         list_add_tail(&newchannel->sc_list, &channel->sc_list);
 282                         channel->num_sc++;
 283                         spin_unlock_irqrestore(&channel->lock, flags);
 284                 } else
 285                         goto err_free_chan;
 286         }
 287
 288         init_vp_index(newchannel, &newchannel->offermsg.offer.if_type);
 289
 290         if (newchannel->target_cpu != get_cpu()) {
 291                 put_cpu();
 292                 smp_call_function_single(newchannel->target_cpu,
 293                                          percpu_channel_enq,
 294                                          newchannel, true);
 295         } else {
 296                 percpu_channel_enq(newchannel);
 297                 put_cpu();
 298         }
 299
 300         /*
 301          * This state is used to indicate a successful open
 302          * so that when we do close the channel normally, we
 303          * can cleanup properly
 304          */
 305         newchannel->state = CHANNEL_OPEN_STATE;
 306
 307         if (!fnew) {
 308                 if (channel->sc_creation_callback != NULL)
 309                         channel->sc_creation_callback(newchannel);
 310                 return;
 311         }
 312
 313         /*
 314          * Start the process of binding this offer to the driver
 315          * We need to set the DeviceObject field before calling
 316          * vmbus_child_dev_add()
 317          */
 318         newchannel->device_obj = vmbus_device_create(
 319                 &newchannel->offermsg.offer.if_type,
 320                 &newchannel->offermsg.offer.if_instance,
 321                 newchannel);
 322         if (!newchannel->device_obj)
 323                 goto err_deq_chan;
 324
 325         /*
 326          * Add the new device to the bus. This will kick off device-driver
 327          * binding which eventually invokes the device driver's AddDevice()
 328          * method.
 329          */
 330         if (vmbus_device_register(newchannel->device_obj) != 0) {
 331                 pr_err("unable to add child device object (relid %d)\n",
 332                         newchannel->offermsg.child_relid);
 333                 kfree(newchannel->device_obj);
 334                 goto err_deq_chan;
 335         }
 336         return;
 337
 338 err_deq_chan:
 339         spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
 340         list_del(&newchannel->listentry);
 341         spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
 342
 343         if (newchannel->target_cpu != get_cpu()) {
 344                 put_cpu();
 345                 smp_call_function_single(newchannel->target_cpu,
 346                                          percpu_channel_deq, newchannel, true);
 347         } else {
 348                 percpu_channel_deq(newchannel);
 349                 put_cpu();
 350         }
 351
 352 err_free_chan:
 353         free_channel(newchannel);
 354 }
 355
 356 enum {
 357         IDE = 0,
 358         SCSI,
 359         NIC,
 360         ND_NIC,
 361         PCIE,
 362         MAX_PERF_CHN,
 363 };
 364
 365 /*
 366  * This is an array of device_ids (device types) that are performance critical.
 367  * We attempt to distribute the interrupt load for these devices across
 368  * all available CPUs.
 369  */
 370 static const struct hv_vmbus_device_id hp_devs[] = {
 371         /* IDE */
 372         { HV_IDE_GUID, },
 373         /* Storage - SCSI */
 374         { HV_SCSI_GUID, },
 375         /* Network */
 376         { HV_NIC_GUID, },
 377         /* NetworkDirect Guest RDMA */
 378         { HV_ND_GUID, },
 379         /* PCI Express Pass Through */
 380         { HV_PCIE_GUID, },
 381 };
 382
 383
 384 /*
 385  * We use this state to statically distribute the channel interrupt load.
 386  */
 387 static int next_numa_node_id;
 388
 389 /*
 390  * Starting with Win8, we can statically distribute the incoming
 391  * channel interrupt load by binding a channel to VCPU.
 392  * We do this in a hierarchical fashion:
 393  * First distribute the primary channels across available NUMA nodes
 394  * and then distribute the subchannels amongst the CPUs in the NUMA
 395  * node assigned to the primary channel.
 396  *
 397  * For pre-win8 hosts or non-performance critical channels we assign the
 398  * first CPU in the first NUMA node.
 399  */
 400 static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_guid)
 401 {
 402         u32 cur_cpu;
 403         int i;
 404         bool perf_chn = false;
 405         struct vmbus_channel *primary = channel->primary_channel;
 406         int next_node;
 407         struct cpumask available_mask;
 408         struct cpumask *alloced_mask;
 409
 410         for (i = IDE; i < MAX_PERF_CHN; i++) {
 411                 if (!memcmp(type_guid->b, &hp_devs[i].guid,
 412                                  sizeof(uuid_le))) {
 413                         perf_chn = true;
 414                         break;
 415                 }
 416         }
 417         if ((vmbus_proto_version == VERSION_WS2008) ||
 418             (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) {
 419                 /*
 420                  * Prior to win8, all channel interrupts are
 421                  * delivered on cpu 0.
 422                  * Also if the channel is not a performance critical
 423                  * channel, bind it to cpu 0.
 424                  */
 425                 channel->numa_node = 0;
 426                 channel->target_cpu = 0;
 427                 channel->target_vp = hv_context.vp_index[0];
 428                 return;
 429         }
 430
 431         /*
 432          * We distribute primary channels evenly across all the available
 433          * NUMA nodes and within the assigned NUMA node we will assign the
 434          * first available CPU to the primary channel.
 435          * The sub-channels will be assigned to the CPUs available in the
 436          * NUMA node evenly.
 437          */
 438         if (!primary) {
 439                 while (true) {
 440                         next_node = next_numa_node_id++;
 441                         if (next_node == nr_node_ids)
 442                                 next_node = next_numa_node_id = 0;
 443                         if (cpumask_empty(cpumask_of_node(next_node)))
 444                                 continue;
 445                         break;
 446                 }
 447                 channel->numa_node = next_node;
 448                 primary = channel;
 449         }
 450         alloced_mask = &hv_context.hv_numa_map[primary->numa_node];
 451
 452         if (cpumask_weight(alloced_mask) ==
 453             cpumask_weight(cpumask_of_node(primary->numa_node))) {
 454                 /*
 455                  * We have cycled through all the CPUs in the node;
 456                  * reset the alloced map.
 457                  */
 458                 cpumask_clear(alloced_mask);
 459         }
 460
 461         cpumask_xor(&available_mask, alloced_mask,
 462                     cpumask_of_node(primary->numa_node));
 463
 464         cur_cpu = -1;
 465         while (true) {
 466                 cur_cpu = cpumask_next(cur_cpu, &available_mask);
 467                 if (cur_cpu >= nr_cpu_ids) {
 468                         cur_cpu = -1;
 469                         cpumask_copy(&available_mask,
 470                                      cpumask_of_node(primary->numa_node));
 471                         continue;
 472                 }
 473
 474                 /*
 475                  * NOTE: in the case of sub-channel, we clear the sub-channel
 476                  * related bit(s) in primary->alloced_cpus_in_node in
 477                  * hv_process_channel_removal(), so when we reload drivers
 478                  * like hv_netvsc in SMP guest, here we're able to re-allocate
 479                  * bit from primary->alloced_cpus_in_node.
 480                  */
 481                 if (!cpumask_test_cpu(cur_cpu,
 482                                 &primary->alloced_cpus_in_node)) {
 483                         cpumask_set_cpu(cur_cpu,
 484                                         &primary->alloced_cpus_in_node);
 485                         cpumask_set_cpu(cur_cpu, alloced_mask);
 486                         break;
 487                 }
 488         }
 489
 490         channel->target_cpu = cur_cpu;
 491         channel->target_vp = hv_context.vp_index[cur_cpu];
 492 }
 493
 494 /*
 495  * vmbus_unload_response - Handler for the unload response.
 496  */
 497 static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
 498 {
 499         /*
 500          * This is a global event; just wakeup the waiting thread.
 501          * Once we successfully unload, we can cleanup the monitor state.
 502          */
 503         complete(&vmbus_connection.unload_event);
 504 }
 505
 506 void vmbus_initiate_unload(void)
 507 {
 508         struct vmbus_channel_message_header hdr;
 509
 510         /* Pre-Win2012R2 hosts don't support reconnect */
 511         if (vmbus_proto_version < VERSION_WIN8_1)
 512                 return;
 513
 514         init_completion(&vmbus_connection.unload_event);
 515         memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
 516         hdr.msgtype = CHANNELMSG_UNLOAD;
 517         vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header));
 518
 519         wait_for_completion(&vmbus_connection.unload_event);
 520 }
 521
 522 /*
 523  * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
 524  *
 525  */
 526 static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 527 {
 528         struct vmbus_channel_offer_channel *offer;
 529         struct vmbus_channel *newchannel;
 530
 531         offer = (struct vmbus_channel_offer_channel *)hdr;
 532
 533         /* Allocate the channel object and save this offer. */
 534         newchannel = alloc_channel();
 535         if (!newchannel) {
 536                 pr_err("Unable to allocate channel object\n");
 537                 return;
 538         }
 539
 540         /*
 541          * By default we setup state to enable batched
 542          * reading. A specific service can choose to
 543          * disable this prior to opening the channel.
 544          */
 545         newchannel->batched_reading = true;
 546
 547         /*
 548          * Setup state for signalling the host.
 549          */
 550         newchannel->sig_event = (struct hv_input_signal_event *)
 551                                 (ALIGN((unsigned long)
 552                                 &newchannel->sig_buf,
 553                                 HV_HYPERCALL_PARAM_ALIGN));
 554
 555         newchannel->sig_event->connectionid.asu32 = 0;
 556         newchannel->sig_event->connectionid.u.id = VMBUS_EVENT_CONNECTION_ID;
 557         newchannel->sig_event->flag_number = 0;
 558         newchannel->sig_event->rsvdz = 0;
 559
 560         if (vmbus_proto_version != VERSION_WS2008) {
 561                 newchannel->is_dedicated_interrupt =
 562                                 (offer->is_dedicated_interrupt != 0);
 563                 newchannel->sig_event->connectionid.u.id =
 564                                 offer->connection_id;
 565         }
 566
 567         memcpy(&newchannel->offermsg, offer,
 568                sizeof(struct vmbus_channel_offer_channel));
 569         newchannel->monitor_grp = (u8)offer->monitorid / 32;
 570         newchannel->monitor_bit = (u8)offer->monitorid % 32;
 571
 572         vmbus_process_offer(newchannel);
 573 }
 574
 575 /*
 576  * vmbus_onoffer_rescind - Rescind offer handler.
 577  *
 578  * We queue a work item to process this offer synchronously
 579  */
 580 static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 581 {
 582         struct vmbus_channel_rescind_offer *rescind;
 583         struct vmbus_channel *channel;
 584         unsigned long flags;
 585         struct device *dev;
 586
 587         rescind = (struct vmbus_channel_rescind_offer *)hdr;
 588         channel = relid2channel(rescind->child_relid);
 589
 590         if (channel == NULL) {
 591                 hv_process_channel_removal(NULL, rescind->child_relid);
 592                 return;
 593         }
 594
 595         spin_lock_irqsave(&channel->lock, flags);
 596         channel->rescind = true;
 597         spin_unlock_irqrestore(&channel->lock, flags);
 598
 599         if (channel->device_obj) {
 600                 /*
 601                  * We will have to unregister this device from the
 602                  * driver core.
 603                  */
 604                 dev = get_device(&channel->device_obj->device);
 605                 if (dev) {
 606                         vmbus_device_unregister(channel->device_obj);
 607                         put_device(dev);
 608                 }
 609         } else {
 610                 hv_process_channel_removal(channel,
 611                         channel->offermsg.child_relid);
 612         }
 613 }
 614
 615 /*
 616  * vmbus_onoffers_delivered -
 617  * This is invoked when all offers have been delivered.
 618  *
 619  * Nothing to do here.
 620  */
 621 static void vmbus_onoffers_delivered(
 622                         struct vmbus_channel_message_header *hdr)
 623 {
 624 }
 625
 626 /*
 627  * vmbus_onopen_result - Open result handler.
 628  *
 629  * This is invoked when we received a response to our channel open request.
 630  * Find the matching request, copy the response and signal the requesting
 631  * thread.
 632  */
 633 static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
 634 {
 635         struct vmbus_channel_open_result *result;
 636         struct vmbus_channel_msginfo *msginfo;
 637         struct vmbus_channel_message_header *requestheader;
 638         struct vmbus_channel_open_channel *openmsg;
 639         unsigned long flags;
 640
 641         result = (struct vmbus_channel_open_result *)hdr;
 642
 643         /*
 644          * Find the open msg, copy the result and signal/unblock the wait event
 645          */
 646         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 647
 648         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
 649                                 msglistentry) {
 650                 requestheader =
 651                         (struct vmbus_channel_message_header *)msginfo->msg;
 652
 653                 if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
 654                         openmsg =
 655                         (struct vmbus_channel_open_channel *)msginfo->msg;
 656                         if (openmsg->child_relid == result->child_relid &&
 657                             openmsg->openid == result->openid) {
 658                                 memcpy(&msginfo->response.open_result,
 659                                        result,
 660                                        sizeof(
 661                                         struct vmbus_channel_open_result));
 662                                 complete(&msginfo->waitevent);
 663                                 break;
 664                         }
 665                 }
 666         }
 667         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 668 }
 669
 670 /*
 671  * vmbus_ongpadl_created - GPADL created handler.
 672  *
 673  * This is invoked when we received a response to our gpadl create request.
 674  * Find the matching request, copy the response and signal the requesting
 675  * thread.
 676  */
 677 static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
 678 {
 679         struct vmbus_channel_gpadl_created *gpadlcreated;
 680         struct vmbus_channel_msginfo *msginfo;
 681         struct vmbus_channel_message_header *requestheader;
 682         struct vmbus_channel_gpadl_header *gpadlheader;
 683         unsigned long flags;
 684
 685         gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
 686
 687         /*
 688          * Find the establish msg, copy the result and signal/unblock the wait
 689          * event
 690          */
 691         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 692
 693         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
 694                                 msglistentry) {
 695                 requestheader =
 696                         (struct vmbus_channel_message_header *)msginfo->msg;
 697
 698                 if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
 699                         gpadlheader =
 700                         (struct vmbus_channel_gpadl_header *)requestheader;
 701
 702                         if ((gpadlcreated->child_relid ==
 703                              gpadlheader->child_relid) &&
 704                             (gpadlcreated->gpadl == gpadlheader->gpadl)) {
 705                                 memcpy(&msginfo->response.gpadl_created,
 706                                        gpadlcreated,
 707                                        sizeof(
 708                                         struct vmbus_channel_gpadl_created));
 709                                 complete(&msginfo->waitevent);
 710                                 break;
 711                         }
 712                 }
 713         }
 714         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 715 }
 716
 717 /*
 718  * vmbus_ongpadl_torndown - GPADL torndown handler.
 719  *
 720  * This is invoked when we received a response to our gpadl teardown request.
 721  * Find the matching request, copy the response and signal the requesting
 722  * thread.
 723  */
 724 static void vmbus_ongpadl_torndown(
 725                         struct vmbus_channel_message_header *hdr)
 726 {
 727         struct vmbus_channel_gpadl_torndown *gpadl_torndown;
 728         struct vmbus_channel_msginfo *msginfo;
 729         struct vmbus_channel_message_header *requestheader;
 730         struct vmbus_channel_gpadl_teardown *gpadl_teardown;
 731         unsigned long flags;
 732
 733         gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
 734
 735         /*
 736          * Find the open msg, copy the result and signal/unblock the wait event
 737          */
 738         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 739
 740         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
 741                                 msglistentry) {
 742                 requestheader =
 743                         (struct vmbus_channel_message_header *)msginfo->msg;
 744
 745                 if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
 746                         gpadl_teardown =
 747                         (struct vmbus_channel_gpadl_teardown *)requestheader;
 748
 749                         if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
 750                                 memcpy(&msginfo->response.gpadl_torndown,
 751                                        gpadl_torndown,
 752                                        sizeof(
 753                                         struct vmbus_channel_gpadl_torndown));
 754                                 complete(&msginfo->waitevent);
 755                                 break;
 756                         }
 757                 }
 758         }
 759         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 760 }
 761
 762 /*
 763  * vmbus_onversion_response - Version response handler
 764  *
 765  * This is invoked when we received a response to our initiate contact request.
 766  * Find the matching request, copy the response and signal the requesting
 767  * thread.
 768  */
 769 static void vmbus_onversion_response(
 770                 struct vmbus_channel_message_header *hdr)
 771 {
 772         struct vmbus_channel_msginfo *msginfo;
 773         struct vmbus_channel_message_header *requestheader;
 774         struct vmbus_channel_version_response *version_response;
 775         unsigned long flags;
 776
 777         version_response = (struct vmbus_channel_version_response *)hdr;
 778         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 779
 780         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
 781                                 msglistentry) {
 782                 requestheader =
 783                         (struct vmbus_channel_message_header *)msginfo->msg;
 784
 785                 if (requestheader->msgtype ==
 786                     CHANNELMSG_INITIATE_CONTACT) {
 787                         memcpy(&msginfo->response.version_response,
 788                               version_response,
 789                               sizeof(struct vmbus_channel_version_response));
 790                         complete(&msginfo->waitevent);
 791                 }
 792         }
 793         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 794 }
 795
 796 /* Channel message dispatch table */
 797 struct vmbus_channel_message_table_entry
 798         channel_message_table[CHANNELMSG_COUNT] = {
 799         {CHANNELMSG_INVALID,                    0, NULL},
 800         {CHANNELMSG_OFFERCHANNEL,               0, vmbus_onoffer},
 801         {CHANNELMSG_RESCIND_CHANNELOFFER,       0, vmbus_onoffer_rescind},
 802         {CHANNELMSG_REQUESTOFFERS,              0, NULL},
 803         {CHANNELMSG_ALLOFFERS_DELIVERED,        1, vmbus_onoffers_delivered},
 804         {CHANNELMSG_OPENCHANNEL,                0, NULL},
 805         {CHANNELMSG_OPENCHANNEL_RESULT,         1, vmbus_onopen_result},
 806         {CHANNELMSG_CLOSECHANNEL,               0, NULL},
 807         {CHANNELMSG_GPADL_HEADER,               0, NULL},
 808         {CHANNELMSG_GPADL_BODY,                 0, NULL},
 809         {CHANNELMSG_GPADL_CREATED,              1, vmbus_ongpadl_created},
 810         {CHANNELMSG_GPADL_TEARDOWN,             0, NULL},
 811         {CHANNELMSG_GPADL_TORNDOWN,             1, vmbus_ongpadl_torndown},
 812         {CHANNELMSG_RELID_RELEASED,             0, NULL},
 813         {CHANNELMSG_INITIATE_CONTACT,           0, NULL},
 814         {CHANNELMSG_VERSION_RESPONSE,           1, vmbus_onversion_response},
 815         {CHANNELMSG_UNLOAD,                     0, NULL},
 816         {CHANNELMSG_UNLOAD_RESPONSE,            1, vmbus_unload_response},
 817 };
 818
 819 /*
 820  * vmbus_onmessage - Handler for channel protocol messages.
 821  *
 822  * This is invoked in the vmbus worker thread context.
 823  */
 824 void vmbus_onmessage(void *context)
 825 {
 826         struct hv_message *msg = context;
 827         struct vmbus_channel_message_header *hdr;
 828         int size;
 829
 830         hdr = (struct vmbus_channel_message_header *)msg->u.payload;
 831         size = msg->header.payload_size;
 832
 833         if (hdr->msgtype >= CHANNELMSG_COUNT) {
 834                 pr_err("Received invalid channel message type %d size %d\n",
 835                            hdr->msgtype, size);
 836                 print_hex_dump_bytes("", DUMP_PREFIX_NONE,
 837                                      (unsigned char *)msg->u.payload, size);
 838                 return;
 839         }
 840
 841         if (channel_message_table[hdr->msgtype].message_handler)
 842                 channel_message_table[hdr->msgtype].message_handler(hdr);
 843         else
 844                 pr_err("Unhandled channel message type %d\n", hdr->msgtype);
 845 }
 846
 847 /*
 848  * vmbus_request_offers - Send a request to get all our pending offers.
 849  */
 850 int vmbus_request_offers(void)
 851 {
 852         struct vmbus_channel_message_header *msg;
 853         struct vmbus_channel_msginfo *msginfo;
 854         int ret;
 855
 856         msginfo = kmalloc(sizeof(*msginfo) +
 857                           sizeof(struct vmbus_channel_message_header),
 858                           GFP_KERNEL);
 859         if (!msginfo)
 860                 return -ENOMEM;
 861
 862         msg = (struct vmbus_channel_message_header *)msginfo->msg;
 863
 864         msg->msgtype = CHANNELMSG_REQUESTOFFERS;
 865
 866
 867         ret = vmbus_post_msg(msg,
 868                                sizeof(struct vmbus_channel_message_header));
 869         if (ret != 0) {
 870                 pr_err("Unable to request offers - %d\n", ret);
 871
 872                 goto cleanup;
 873         }
 874
 875 cleanup:
 876         kfree(msginfo);
 877
 878         return ret;
 879 }
 880
 881 /*
 882  * Retrieve the (sub) channel on which to send an outgoing request.
 883  * When a primary channel has multiple sub-channels, we try to
 884  * distribute the load equally amongst all available channels.
 885  */
 886 struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
 887 {
 888         struct list_head *cur, *tmp;
 889         int cur_cpu;
 890         struct vmbus_channel *cur_channel;
 891         struct vmbus_channel *outgoing_channel = primary;
 892         int next_channel;
 893         int i = 1;
 894
 895         if (list_empty(&primary->sc_list))
 896                 return outgoing_channel;
 897
 898         next_channel = primary->next_oc++;
 899
 900         if (next_channel > (primary->num_sc)) {
 901                 primary->next_oc = 0;
 902                 return outgoing_channel;
 903         }
 904
 905         cur_cpu = hv_context.vp_index[get_cpu()];
 906         put_cpu();
 907         list_for_each_safe(cur, tmp, &primary->sc_list) {
 908                 cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
 909                 if (cur_channel->state != CHANNEL_OPENED_STATE)
 910                         continue;
 911
 912                 if (cur_channel->target_vp == cur_cpu)
 913                         return cur_channel;
 914
 915                 if (i == next_channel)
 916                         return cur_channel;
 917
 918                 i++;
 919         }
 920
 921         return outgoing_channel;
 922 }
 923 EXPORT_SYMBOL_GPL(vmbus_get_outgoing_channel);
 924
 925 static void invoke_sc_cb(struct vmbus_channel *primary_channel)
 926 {
 927         struct list_head *cur, *tmp;
 928         struct vmbus_channel *cur_channel;
 929
 930         if (primary_channel->sc_creation_callback == NULL)
 931                 return;
 932
 933         list_for_each_safe(cur, tmp, &primary_channel->sc_list) {
 934                 cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
 935
 936                 primary_channel->sc_creation_callback(cur_channel);
 937         }
 938 }
 939
 940 void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
 941                                 void (*sc_cr_cb)(struct vmbus_channel *new_sc))
 942 {
 943         primary_channel->sc_creation_callback = sc_cr_cb;
 944 }
 945 EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);
 946
 947 bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
 948 {
 949         bool ret;
 950
 951         ret = !list_empty(&primary->sc_list);
 952
 953         if (ret) {
 954                 /*
 955                  * Invoke the callback on sub-channel creation.
 956                  * This will present a uniform interface to the
 957                  * clients.
 958                  */
 959                 invoke_sc_cb(primary);
 960         }
 961
 962         return ret;
 963 }
 964 EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);