drivers/hv/channel_mgmt.c

   1 /*
   2  * Copyright (c) 2009, Microsoft Corporation.
   3  *
   4  * This program is free software; you can redistribute it and/or modify it
   5  * under the terms and conditions of the GNU General Public License,
   6  * version 2, as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope it will be useful, but WITHOUT
   9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11  * more details.
  12  *
  13  * You should have received a copy of the GNU General Public License along with
  14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  15  * Place - Suite 330, Boston, MA 02111-1307 USA.
  16  *
  17  * Authors:
  18  *   Haiyang Zhang <haiyangz@microsoft.com>
  19  *   Hank Janssen  <hjanssen@microsoft.com>
  20  */
  21 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  22
  23 #include <linux/kernel.h>
  24 #include <linux/interrupt.h>
  25 #include <linux/sched.h>
  26 #include <linux/wait.h>
  27 #include <linux/mm.h>
  28 #include <linux/slab.h>
  29 #include <linux/list.h>
  30 #include <linux/module.h>
  31 #include <linux/completion.h>
  32 #include <linux/delay.h>
  33 #include <linux/hyperv.h>
  34 #include <asm/mshyperv.h>
  35
  36 #include "hyperv_vmbus.h"
  37
  38 static void init_vp_index(struct vmbus_channel *channel, u16 dev_type);
  39
  40 static const struct vmbus_device vmbus_devs[] = {
  41         /* IDE */
  42         { .dev_type = HV_IDE,
  43           HV_IDE_GUID,
  44           .perf_device = true,
  45         },
  46
  47         /* SCSI */
  48         { .dev_type = HV_SCSI,
  49           HV_SCSI_GUID,
  50           .perf_device = true,
  51         },
  52
  53         /* Fibre Channel */
  54         { .dev_type = HV_FC,
  55           HV_SYNTHFC_GUID,
  56           .perf_device = true,
  57         },
  58
  59         /* Synthetic NIC */
  60         { .dev_type = HV_NIC,
  61           HV_NIC_GUID,
  62           .perf_device = true,
  63         },
  64
  65         /* Network Direct */
  66         { .dev_type = HV_ND,
  67           HV_ND_GUID,
  68           .perf_device = true,
  69         },
  70
  71         /* PCIE */
  72         { .dev_type = HV_PCIE,
  73           HV_PCIE_GUID,
  74           .perf_device = true,
  75         },
  76
  77         /* Synthetic Frame Buffer */
  78         { .dev_type = HV_FB,
  79           HV_SYNTHVID_GUID,
  80           .perf_device = false,
  81         },
  82
  83         /* Synthetic Keyboard */
  84         { .dev_type = HV_KBD,
  85           HV_KBD_GUID,
  86           .perf_device = false,
  87         },
  88
  89         /* Synthetic MOUSE */
  90         { .dev_type = HV_MOUSE,
  91           HV_MOUSE_GUID,
  92           .perf_device = false,
  93         },
  94
  95         /* KVP */
  96         { .dev_type = HV_KVP,
  97           HV_KVP_GUID,
  98           .perf_device = false,
  99         },
 100
 101         /* Time Synch */
 102         { .dev_type = HV_TS,
 103           HV_TS_GUID,
 104           .perf_device = false,
 105         },
 106
 107         /* Heartbeat */
 108         { .dev_type = HV_HB,
 109           HV_HEART_BEAT_GUID,
 110           .perf_device = false,
 111         },
 112
 113         /* Shutdown */
 114         { .dev_type = HV_SHUTDOWN,
 115           HV_SHUTDOWN_GUID,
 116           .perf_device = false,
 117         },
 118
 119         /* File copy */
 120         { .dev_type = HV_FCOPY,
 121           HV_FCOPY_GUID,
 122           .perf_device = false,
 123         },
 124
 125         /* Backup */
 126         { .dev_type = HV_BACKUP,
 127           HV_VSS_GUID,
 128           .perf_device = false,
 129         },
 130
 131         /* Dynamic Memory */
 132         { .dev_type = HV_DM,
 133           HV_DM_GUID,
 134           .perf_device = false,
 135         },
 136
 137         /* Unknown GUID */
 138         { .dev_type = HV_UNKNOWN,
 139           .perf_device = false,
 140         },
 141 };
 142
 143 static const struct {
 144         uuid_le guid;
 145 } vmbus_unsupported_devs[] = {
 146         { HV_AVMA1_GUID },
 147         { HV_AVMA2_GUID },
 148         { HV_RDV_GUID   },
 149 };
 150
 151 /*
 152  * The rescinded channel may be blocked waiting for a response from the host;
 153  * take care of that.
 154  */
 155 static void vmbus_rescind_cleanup(struct vmbus_channel *channel)
 156 {
 157         struct vmbus_channel_msginfo *msginfo;
 158         unsigned long flags;
 159
 160
 161         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 162
 163         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
 164                                 msglistentry) {
 165
 166                 if (msginfo->waiting_channel == channel) {
 167                         complete(&msginfo->waitevent);
 168                         break;
 169                 }
 170         }
 171         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 172 }
 173
 174 static bool is_unsupported_vmbus_devs(const uuid_le *guid)
 175 {
 176         int i;
 177
 178         for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++)
 179                 if (!uuid_le_cmp(*guid, vmbus_unsupported_devs[i].guid))
 180                         return true;
 181         return false;
 182 }
 183
 184 static u16 hv_get_dev_type(const struct vmbus_channel *channel)
 185 {
 186         const uuid_le *guid = &channel->offermsg.offer.if_type;
 187         u16 i;
 188
 189         if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid))
 190                 return HV_UNKNOWN;
 191
 192         for (i = HV_IDE; i < HV_UNKNOWN; i++) {
 193                 if (!uuid_le_cmp(*guid, vmbus_devs[i].guid))
 194                         return i;
 195         }
 196         pr_info("Unknown GUID: %pUl\n", guid);
 197         return i;
 198 }
 199
 200 /**
 201  * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
 202  * @icmsghdrp: Pointer to msg header structure
 203  * @icmsg_negotiate: Pointer to negotiate message structure
 204  * @buf: Raw buffer channel data
 205  *
 206  * @icmsghdrp is of type &struct icmsg_hdr.
 207  * Set up and fill in default negotiate response message.
 208  *
 209  * The fw_version and fw_vercnt specifies the framework version that
 210  * we can support.
 211  *
 212  * The srv_version and srv_vercnt specifies the service
 213  * versions we can support.
 214  *
 215  * Versions are given in decreasing order.
 216  *
 217  * nego_fw_version and nego_srv_version store the selected protocol versions.
 218  *
 219  * Mainly used by Hyper-V drivers.
 220  */
 221 bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
 222                                 u8 *buf, const int *fw_version, int fw_vercnt,
 223                                 const int *srv_version, int srv_vercnt,
 224                                 int *nego_fw_version, int *nego_srv_version)
 225 {
 226         int icframe_major, icframe_minor;
 227         int icmsg_major, icmsg_minor;
 228         int fw_major, fw_minor;
 229         int srv_major, srv_minor;
 230         int i, j;
 231         bool found_match = false;
 232         struct icmsg_negotiate *negop;
 233
 234         icmsghdrp->icmsgsize = 0x10;
 235         negop = (struct icmsg_negotiate *)&buf[
 236                 sizeof(struct vmbuspipe_hdr) +
 237                 sizeof(struct icmsg_hdr)];
 238
 239         icframe_major = negop->icframe_vercnt;
 240         icframe_minor = 0;
 241
 242         icmsg_major = negop->icmsg_vercnt;
 243         icmsg_minor = 0;
 244
 245         /*
 246          * Select the framework version number we will
 247          * support.
 248          */
 249
 250         for (i = 0; i < fw_vercnt; i++) {
 251                 fw_major = (fw_version[i] >> 16);
 252                 fw_minor = (fw_version[i] & 0xFFFF);
 253
 254                 for (j = 0; j < negop->icframe_vercnt; j++) {
 255                         if ((negop->icversion_data[j].major == fw_major) &&
 256                             (negop->icversion_data[j].minor == fw_minor)) {
 257                                 icframe_major = negop->icversion_data[j].major;
 258                                 icframe_minor = negop->icversion_data[j].minor;
 259                                 found_match = true;
 260                                 break;
 261                         }
 262                 }
 263
 264                 if (found_match)
 265                         break;
 266         }
 267
 268         if (!found_match)
 269                 goto fw_error;
 270
 271         found_match = false;
 272
 273         for (i = 0; i < srv_vercnt; i++) {
 274                 srv_major = (srv_version[i] >> 16);
 275                 srv_minor = (srv_version[i] & 0xFFFF);
 276
 277                 for (j = negop->icframe_vercnt;
 278                         (j < negop->icframe_vercnt + negop->icmsg_vercnt);
 279                         j++) {
 280
 281                         if ((negop->icversion_data[j].major == srv_major) &&
 282                                 (negop->icversion_data[j].minor == srv_minor)) {
 283
 284                                 icmsg_major = negop->icversion_data[j].major;
 285                                 icmsg_minor = negop->icversion_data[j].minor;
 286                                 found_match = true;
 287                                 break;
 288                         }
 289                 }
 290
 291                 if (found_match)
 292                         break;
 293         }
 294
 295         /*
 296          * Respond with the framework and service
 297          * version numbers we can support.
 298          */
 299
 300 fw_error:
 301         if (!found_match) {
 302                 negop->icframe_vercnt = 0;
 303                 negop->icmsg_vercnt = 0;
 304         } else {
 305                 negop->icframe_vercnt = 1;
 306                 negop->icmsg_vercnt = 1;
 307         }
 308
 309         if (nego_fw_version)
 310                 *nego_fw_version = (icframe_major << 16) | icframe_minor;
 311
 312         if (nego_srv_version)
 313                 *nego_srv_version = (icmsg_major << 16) | icmsg_minor;
 314
 315         negop->icversion_data[0].major = icframe_major;
 316         negop->icversion_data[0].minor = icframe_minor;
 317         negop->icversion_data[1].major = icmsg_major;
 318         negop->icversion_data[1].minor = icmsg_minor;
 319         return found_match;
 320 }
 321
 322 EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
 323
 324 /*
 325  * alloc_channel - Allocate and initialize a vmbus channel object
 326  */
 327 static struct vmbus_channel *alloc_channel(void)
 328 {
 329         struct vmbus_channel *channel;
 330
 331         channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
 332         if (!channel)
 333                 return NULL;
 334
 335         channel->acquire_ring_lock = true;
 336         spin_lock_init(&channel->inbound_lock);
 337         spin_lock_init(&channel->lock);
 338
 339         INIT_LIST_HEAD(&channel->sc_list);
 340         INIT_LIST_HEAD(&channel->percpu_list);
 341
 342         return channel;
 343 }
 344
 345 /*
 346  * free_channel - Release the resources used by the vmbus channel object
 347  */
 348 static void free_channel(struct vmbus_channel *channel)
 349 {
 350         kfree(channel);
 351 }
 352
 353 static void percpu_channel_enq(void *arg)
 354 {
 355         struct vmbus_channel *channel = arg;
 356         int cpu = smp_processor_id();
 357
 358         list_add_tail(&channel->percpu_list, &hv_context.percpu_list[cpu]);
 359 }
 360
 361 static void percpu_channel_deq(void *arg)
 362 {
 363         struct vmbus_channel *channel = arg;
 364
 365         list_del(&channel->percpu_list);
 366 }
 367
 368
 369 static void vmbus_release_relid(u32 relid)
 370 {
 371         struct vmbus_channel_relid_released msg;
 372
 373         memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
 374         msg.child_relid = relid;
 375         msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
 376         vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released),
 377                        true);
 378 }
 379
 380 void hv_event_tasklet_disable(struct vmbus_channel *channel)
 381 {
 382         struct tasklet_struct *tasklet;
 383         tasklet = hv_context.event_dpc[channel->target_cpu];
 384         tasklet_disable(tasklet);
 385 }
 386
 387 void hv_event_tasklet_enable(struct vmbus_channel *channel)
 388 {
 389         struct tasklet_struct *tasklet;
 390         tasklet = hv_context.event_dpc[channel->target_cpu];
 391         tasklet_enable(tasklet);
 392
 393         /* In case there is any pending event */
 394         tasklet_schedule(tasklet);
 395 }
 396
 397 void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
 398 {
 399         unsigned long flags;
 400         struct vmbus_channel *primary_channel;
 401
 402         BUG_ON(!channel->rescind);
 403         BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
 404
 405         hv_event_tasklet_disable(channel);
 406         if (channel->target_cpu != get_cpu()) {
 407                 put_cpu();
 408                 smp_call_function_single(channel->target_cpu,
 409                                          percpu_channel_deq, channel, true);
 410         } else {
 411                 percpu_channel_deq(channel);
 412                 put_cpu();
 413         }
 414         hv_event_tasklet_enable(channel);
 415
 416         if (channel->primary_channel == NULL) {
 417                 list_del(&channel->listentry);
 418
 419                 primary_channel = channel;
 420         } else {
 421                 primary_channel = channel->primary_channel;
 422                 spin_lock_irqsave(&primary_channel->lock, flags);
 423                 list_del(&channel->sc_list);
 424                 primary_channel->num_sc--;
 425                 spin_unlock_irqrestore(&primary_channel->lock, flags);
 426         }
 427
 428         /*
 429          * We need to free the bit for init_vp_index() to work in the case
 430          * of sub-channel, when we reload drivers like hv_netvsc.
 431          */
 432         if (channel->affinity_policy == HV_LOCALIZED)
 433                 cpumask_clear_cpu(channel->target_cpu,
 434                                   &primary_channel->alloced_cpus_in_node);
 435
 436         vmbus_release_relid(relid);
 437
 438         free_channel(channel);
 439 }
 440
 441 void vmbus_free_channels(void)
 442 {
 443         struct vmbus_channel *channel, *tmp;
 444
 445         mutex_lock(&vmbus_connection.channel_mutex);
 446         list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
 447                 listentry) {
 448                 /* hv_process_channel_removal() needs this */
 449                 channel->rescind = true;
 450
 451                 vmbus_device_unregister(channel->device_obj);
 452         }
 453         mutex_unlock(&vmbus_connection.channel_mutex);
 454 }
 455
 456 /*
 457  * vmbus_process_offer - Process the offer by creating a channel/device
 458  * associated with this offer
 459  */
 460 static void vmbus_process_offer(struct vmbus_channel *newchannel)
 461 {
 462         struct vmbus_channel *channel;
 463         bool fnew = true;
 464         unsigned long flags;
 465         u16 dev_type;
 466         int ret;
 467
 468         /* Make sure this is a new offer */
 469         mutex_lock(&vmbus_connection.channel_mutex);
 470
 471         list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
 472                 if (!uuid_le_cmp(channel->offermsg.offer.if_type,
 473                         newchannel->offermsg.offer.if_type) &&
 474                         !uuid_le_cmp(channel->offermsg.offer.if_instance,
 475                                 newchannel->offermsg.offer.if_instance)) {
 476                         fnew = false;
 477                         break;
 478                 }
 479         }
 480
 481         if (fnew)
 482                 list_add_tail(&newchannel->listentry,
 483                               &vmbus_connection.chn_list);
 484
 485         mutex_unlock(&vmbus_connection.channel_mutex);
 486
 487         if (!fnew) {
 488                 /*
 489                  * Check to see if this is a sub-channel.
 490                  */
 491                 if (newchannel->offermsg.offer.sub_channel_index != 0) {
 492                         /*
 493                          * Process the sub-channel.
 494                          */
 495                         newchannel->primary_channel = channel;
 496                         spin_lock_irqsave(&channel->lock, flags);
 497                         list_add_tail(&newchannel->sc_list, &channel->sc_list);
 498                         channel->num_sc++;
 499                         spin_unlock_irqrestore(&channel->lock, flags);
 500                 } else
 501                         goto err_free_chan;
 502         }
 503
 504         dev_type = hv_get_dev_type(newchannel);
 505
 506         init_vp_index(newchannel, dev_type);
 507
 508         hv_event_tasklet_disable(newchannel);
 509         if (newchannel->target_cpu != get_cpu()) {
 510                 put_cpu();
 511                 smp_call_function_single(newchannel->target_cpu,
 512                                          percpu_channel_enq,
 513                                          newchannel, true);
 514         } else {
 515                 percpu_channel_enq(newchannel);
 516                 put_cpu();
 517         }
 518         hv_event_tasklet_enable(newchannel);
 519
 520         /*
 521          * This state is used to indicate a successful open
 522          * so that when we do close the channel normally, we
 523          * can cleanup properly
 524          */
 525         newchannel->state = CHANNEL_OPEN_STATE;
 526
 527         if (!fnew) {
 528                 if (channel->sc_creation_callback != NULL)
 529                         channel->sc_creation_callback(newchannel);
 530                 return;
 531         }
 532
 533         /*
 534          * Start the process of binding this offer to the driver
 535          * We need to set the DeviceObject field before calling
 536          * vmbus_child_dev_add()
 537          */
 538         newchannel->device_obj = vmbus_device_create(
 539                 &newchannel->offermsg.offer.if_type,
 540                 &newchannel->offermsg.offer.if_instance,
 541                 newchannel);
 542         if (!newchannel->device_obj)
 543                 goto err_deq_chan;
 544
 545         newchannel->device_obj->device_id = dev_type;
 546         /*
 547          * Add the new device to the bus. This will kick off device-driver
 548          * binding which eventually invokes the device driver's AddDevice()
 549          * method.
 550          */
 551         mutex_lock(&vmbus_connection.channel_mutex);
 552         ret = vmbus_device_register(newchannel->device_obj);
 553         mutex_unlock(&vmbus_connection.channel_mutex);
 554
 555         if (ret != 0) {
 556                 pr_err("unable to add child device object (relid %d)\n",
 557                         newchannel->offermsg.child_relid);
 558                 kfree(newchannel->device_obj);
 559                 goto err_deq_chan;
 560         }
 561         return;
 562
 563 err_deq_chan:
 564         mutex_lock(&vmbus_connection.channel_mutex);
 565         list_del(&newchannel->listentry);
 566         mutex_unlock(&vmbus_connection.channel_mutex);
 567
 568         hv_event_tasklet_disable(newchannel);
 569         if (newchannel->target_cpu != get_cpu()) {
 570                 put_cpu();
 571                 smp_call_function_single(newchannel->target_cpu,
 572                                          percpu_channel_deq, newchannel, true);
 573         } else {
 574                 percpu_channel_deq(newchannel);
 575                 put_cpu();
 576         }
 577         hv_event_tasklet_enable(newchannel);
 578
 579         vmbus_release_relid(newchannel->offermsg.child_relid);
 580
 581 err_free_chan:
 582         free_channel(newchannel);
 583 }
 584
 585 /*
 586  * We use this state to statically distribute the channel interrupt load.
 587  */
 588 static int next_numa_node_id;
 589
 590 /*
 591  * Starting with Win8, we can statically distribute the incoming
 592  * channel interrupt load by binding a channel to VCPU.
 593  * We do this in a hierarchical fashion:
 594  * First distribute the primary channels across available NUMA nodes
 595  * and then distribute the subchannels amongst the CPUs in the NUMA
 596  * node assigned to the primary channel.
 597  *
 598  * For pre-win8 hosts or non-performance critical channels we assign the
 599  * first CPU in the first NUMA node.
 600  */
 601 static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
 602 {
 603         u32 cur_cpu;
 604         bool perf_chn = vmbus_devs[dev_type].perf_device;
 605         struct vmbus_channel *primary = channel->primary_channel;
 606         int next_node;
 607         struct cpumask available_mask;
 608         struct cpumask *alloced_mask;
 609
 610         if ((vmbus_proto_version == VERSION_WS2008) ||
 611             (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) {
 612                 /*
 613                  * Prior to win8, all channel interrupts are
 614                  * delivered on cpu 0.
 615                  * Also if the channel is not a performance critical
 616                  * channel, bind it to cpu 0.
 617                  */
 618                 channel->numa_node = 0;
 619                 channel->target_cpu = 0;
 620                 channel->target_vp = hv_context.vp_index[0];
 621                 return;
 622         }
 623
 624         /*
 625          * Based on the channel affinity policy, we will assign the NUMA
 626          * nodes.
 627          */
 628
 629         if ((channel->affinity_policy == HV_BALANCED) || (!primary)) {
 630                 while (true) {
 631                         next_node = next_numa_node_id++;
 632                         if (next_node == nr_node_ids) {
 633                                 next_node = next_numa_node_id = 0;
 634                                 continue;
 635                         }
 636                         if (cpumask_empty(cpumask_of_node(next_node)))
 637                                 continue;
 638                         break;
 639                 }
 640                 channel->numa_node = next_node;
 641                 primary = channel;
 642         }
 643         alloced_mask = &hv_context.hv_numa_map[primary->numa_node];
 644
 645         if (cpumask_weight(alloced_mask) ==
 646             cpumask_weight(cpumask_of_node(primary->numa_node))) {
 647                 /*
 648                  * We have cycled through all the CPUs in the node;
 649                  * reset the alloced map.
 650                  */
 651                 cpumask_clear(alloced_mask);
 652         }
 653
 654         cpumask_xor(&available_mask, alloced_mask,
 655                     cpumask_of_node(primary->numa_node));
 656
 657         cur_cpu = -1;
 658
 659         if (primary->affinity_policy == HV_LOCALIZED) {
 660                 /*
 661                  * Normally Hyper-V host doesn't create more subchannels
 662                  * than there are VCPUs on the node but it is possible when not
 663                  * all present VCPUs on the node are initialized by guest.
 664                  * Clear the alloced_cpus_in_node to start over.
 665                  */
 666                 if (cpumask_equal(&primary->alloced_cpus_in_node,
 667                                   cpumask_of_node(primary->numa_node)))
 668                         cpumask_clear(&primary->alloced_cpus_in_node);
 669         }
 670
 671         while (true) {
 672                 cur_cpu = cpumask_next(cur_cpu, &available_mask);
 673                 if (cur_cpu >= nr_cpu_ids) {
 674                         cur_cpu = -1;
 675                         cpumask_copy(&available_mask,
 676                                      cpumask_of_node(primary->numa_node));
 677                         continue;
 678                 }
 679
 680                 if (primary->affinity_policy == HV_LOCALIZED) {
 681                         /*
 682                          * NOTE: in the case of sub-channel, we clear the
 683                          * sub-channel related bit(s) in
 684                          * primary->alloced_cpus_in_node in
 685                          * hv_process_channel_removal(), so when we
 686                          * reload drivers like hv_netvsc in SMP guest, here
 687                          * we're able to re-allocate
 688                          * bit from primary->alloced_cpus_in_node.
 689                          */
 690                         if (!cpumask_test_cpu(cur_cpu,
 691                                               &primary->alloced_cpus_in_node)) {
 692                                 cpumask_set_cpu(cur_cpu,
 693                                                 &primary->alloced_cpus_in_node);
 694                                 cpumask_set_cpu(cur_cpu, alloced_mask);
 695                                 break;
 696                         }
 697                 } else {
 698                         cpumask_set_cpu(cur_cpu, alloced_mask);
 699                         break;
 700                 }
 701         }
 702
 703         channel->target_cpu = cur_cpu;
 704         channel->target_vp = hv_context.vp_index[cur_cpu];
 705 }
 706
 707 static void vmbus_wait_for_unload(void)
 708 {
 709         int cpu;
 710         void *page_addr;
 711         struct hv_message *msg;
 712         struct vmbus_channel_message_header *hdr;
 713         u32 message_type;
 714
 715         /*
 716          * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was
 717          * used for initial contact or to CPU0 depending on host version. When
 718          * we're crashing on a different CPU let's hope that IRQ handler on
 719          * the cpu which receives CHANNELMSG_UNLOAD_RESPONSE is still
 720          * functional and vmbus_unload_response() will complete
 721          * vmbus_connection.unload_event. If not, the last thing we can do is
 722          * read message pages for all CPUs directly.
 723          */
 724         while (1) {
 725                 if (completion_done(&vmbus_connection.unload_event))
 726                         break;
 727
 728                 for_each_online_cpu(cpu) {
 729                         page_addr = hv_context.synic_message_page[cpu];
 730                         msg = (struct hv_message *)page_addr +
 731                                 VMBUS_MESSAGE_SINT;
 732
 733                         message_type = READ_ONCE(msg->header.message_type);
 734                         if (message_type == HVMSG_NONE)
 735                                 continue;
 736
 737                         hdr = (struct vmbus_channel_message_header *)
 738                                 msg->u.payload;
 739
 740                         if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE)
 741                                 complete(&vmbus_connection.unload_event);
 742
 743                         vmbus_signal_eom(msg, message_type);
 744                 }
 745
 746                 mdelay(10);
 747         }
 748
 749         /*
 750          * We're crashing and already got the UNLOAD_RESPONSE, cleanup all
 751          * maybe-pending messages on all CPUs to be able to receive new
 752          * messages after we reconnect.
 753          */
 754         for_each_online_cpu(cpu) {
 755                 page_addr = hv_context.synic_message_page[cpu];
 756                 msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
 757                 msg->header.message_type = HVMSG_NONE;
 758         }
 759 }
 760
 761 /*
 762  * vmbus_unload_response - Handler for the unload response.
 763  */
 764 static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
 765 {
 766         /*
 767          * This is a global event; just wakeup the waiting thread.
 768          * Once we successfully unload, we can cleanup the monitor state.
 769          */
 770         complete(&vmbus_connection.unload_event);
 771 }
 772
 773 void vmbus_initiate_unload(bool crash)
 774 {
 775         struct vmbus_channel_message_header hdr;
 776
 777         /* Pre-Win2012R2 hosts don't support reconnect */
 778         if (vmbus_proto_version < VERSION_WIN8_1)
 779                 return;
 780
 781         init_completion(&vmbus_connection.unload_event);
 782         memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
 783         hdr.msgtype = CHANNELMSG_UNLOAD;
 784         vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header),
 785                        !crash);
 786
 787         /*
 788          * vmbus_initiate_unload() is also called on crash and the crash can be
 789          * happening in an interrupt context, where scheduling is impossible.
 790          */
 791         if (!crash)
 792                 wait_for_completion(&vmbus_connection.unload_event);
 793         else
 794                 vmbus_wait_for_unload();
 795 }
 796
 797 /*
 798  * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
 799  *
 800  */
 801 static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 802 {
 803         struct vmbus_channel_offer_channel *offer;
 804         struct vmbus_channel *newchannel;
 805
 806         offer = (struct vmbus_channel_offer_channel *)hdr;
 807
 808         /* Allocate the channel object and save this offer. */
 809         newchannel = alloc_channel();
 810         if (!newchannel) {
 811                 vmbus_release_relid(offer->child_relid);
 812                 pr_err("Unable to allocate channel object\n");
 813                 return;
 814         }
 815
 816         /*
 817          * By default we setup state to enable batched
 818          * reading. A specific service can choose to
 819          * disable this prior to opening the channel.
 820          */
 821         newchannel->batched_reading = true;
 822
 823         /*
 824          * Setup state for signalling the host.
 825          */
 826         newchannel->sig_event = (struct hv_input_signal_event *)
 827                                 (ALIGN((unsigned long)
 828                                 &newchannel->sig_buf,
 829                                 HV_HYPERCALL_PARAM_ALIGN));
 830
 831         newchannel->sig_event->connectionid.asu32 = 0;
 832         newchannel->sig_event->connectionid.u.id = VMBUS_EVENT_CONNECTION_ID;
 833         newchannel->sig_event->flag_number = 0;
 834         newchannel->sig_event->rsvdz = 0;
 835
 836         if (vmbus_proto_version != VERSION_WS2008) {
 837                 newchannel->is_dedicated_interrupt =
 838                                 (offer->is_dedicated_interrupt != 0);
 839                 newchannel->sig_event->connectionid.u.id =
 840                                 offer->connection_id;
 841         }
 842
 843         memcpy(&newchannel->offermsg, offer,
 844                sizeof(struct vmbus_channel_offer_channel));
 845         newchannel->monitor_grp = (u8)offer->monitorid / 32;
 846         newchannel->monitor_bit = (u8)offer->monitorid % 32;
 847
 848         vmbus_process_offer(newchannel);
 849 }
 850
 851 /*
 852  * vmbus_onoffer_rescind - Rescind offer handler.
 853  *
 854  * We queue a work item to process this offer synchronously
 855  */
 856 static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 857 {
 858         struct vmbus_channel_rescind_offer *rescind;
 859         struct vmbus_channel *channel;
 860         unsigned long flags;
 861         struct device *dev;
 862
 863         rescind = (struct vmbus_channel_rescind_offer *)hdr;
 864
 865         mutex_lock(&vmbus_connection.channel_mutex);
 866         channel = relid2channel(rescind->child_relid);
 867
 868         if (channel == NULL) {
 869                 /*
 870                  * This is very impossible, because in
 871                  * vmbus_process_offer(), we have already invoked
 872                  * vmbus_release_relid() on error.
 873                  */
 874                 goto out;
 875         }
 876
 877         spin_lock_irqsave(&channel->lock, flags);
 878         channel->rescind = true;
 879         spin_unlock_irqrestore(&channel->lock, flags);
 880
 881         vmbus_rescind_cleanup(channel);
 882
 883         if (channel->device_obj) {
 884                 if (channel->chn_rescind_callback) {
 885                         channel->chn_rescind_callback(channel);
 886                         goto out;
 887                 }
 888                 /*
 889                  * We will have to unregister this device from the
 890                  * driver core.
 891                  */
 892                 dev = get_device(&channel->device_obj->device);
 893                 if (dev) {
 894                         vmbus_device_unregister(channel->device_obj);
 895                         put_device(dev);
 896                 }
 897         } else {
 898                 hv_process_channel_removal(channel,
 899                         channel->offermsg.child_relid);
 900         }
 901
 902 out:
 903         mutex_unlock(&vmbus_connection.channel_mutex);
 904 }
 905
 906 void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
 907 {
 908         mutex_lock(&vmbus_connection.channel_mutex);
 909
 910         BUG_ON(!is_hvsock_channel(channel));
 911
 912         channel->rescind = true;
 913         vmbus_device_unregister(channel->device_obj);
 914
 915         mutex_unlock(&vmbus_connection.channel_mutex);
 916 }
 917 EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister);
 918
 919
 920 /*
 921  * vmbus_onoffers_delivered -
 922  * This is invoked when all offers have been delivered.
 923  *
 924  * Nothing to do here.
 925  */
 926 static void vmbus_onoffers_delivered(
 927                         struct vmbus_channel_message_header *hdr)
 928 {
 929 }
 930
 931 /*
 932  * vmbus_onopen_result - Open result handler.
 933  *
 934  * This is invoked when we received a response to our channel open request.
 935  * Find the matching request, copy the response and signal the requesting
 936  * thread.
 937  */
 938 static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
 939 {
 940         struct vmbus_channel_open_result *result;
 941         struct vmbus_channel_msginfo *msginfo;
 942         struct vmbus_channel_message_header *requestheader;
 943         struct vmbus_channel_open_channel *openmsg;
 944         unsigned long flags;
 945
 946         result = (struct vmbus_channel_open_result *)hdr;
 947
 948         /*
 949          * Find the open msg, copy the result and signal/unblock the wait event
 950          */
 951         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 952
 953         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
 954                                 msglistentry) {
 955                 requestheader =
 956                         (struct vmbus_channel_message_header *)msginfo->msg;
 957
 958                 if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
 959                         openmsg =
 960                         (struct vmbus_channel_open_channel *)msginfo->msg;
 961                         if (openmsg->child_relid == result->child_relid &&
 962                             openmsg->openid == result->openid) {
 963                                 memcpy(&msginfo->response.open_result,
 964                                        result,
 965                                        sizeof(
 966                                         struct vmbus_channel_open_result));
 967                                 complete(&msginfo->waitevent);
 968                                 break;
 969                         }
 970                 }
 971         }
 972         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 973 }
 974
 975 /*
 976  * vmbus_ongpadl_created - GPADL created handler.
 977  *
 978  * This is invoked when we received a response to our gpadl create request.
 979  * Find the matching request, copy the response and signal the requesting
 980  * thread.
 981  */
 982 static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
 983 {
 984         struct vmbus_channel_gpadl_created *gpadlcreated;
 985         struct vmbus_channel_msginfo *msginfo;
 986         struct vmbus_channel_message_header *requestheader;
 987         struct vmbus_channel_gpadl_header *gpadlheader;
 988         unsigned long flags;
 989
 990         gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
 991
 992         /*
 993          * Find the establish msg, copy the result and signal/unblock the wait
 994          * event
 995          */
 996         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 997
 998         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
 999                                 msglistentry) {
1000                 requestheader =
1001                         (struct vmbus_channel_message_header *)msginfo->msg;
1002
1003                 if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
1004                         gpadlheader =
1005                         (struct vmbus_channel_gpadl_header *)requestheader;
1006
1007                         if ((gpadlcreated->child_relid ==
1008                              gpadlheader->child_relid) &&
1009                             (gpadlcreated->gpadl == gpadlheader->gpadl)) {
1010                                 memcpy(&msginfo->response.gpadl_created,
1011                                        gpadlcreated,
1012                                        sizeof(
1013                                         struct vmbus_channel_gpadl_created));
1014                                 complete(&msginfo->waitevent);
1015                                 break;
1016                         }
1017                 }
1018         }
1019         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1020 }
1021
1022 /*
1023  * vmbus_ongpadl_torndown - GPADL torndown handler.
1024  *
1025  * This is invoked when we received a response to our gpadl teardown request.
1026  * Find the matching request, copy the response and signal the requesting
1027  * thread.
1028  */
1029 static void vmbus_ongpadl_torndown(
1030                         struct vmbus_channel_message_header *hdr)
1031 {
1032         struct vmbus_channel_gpadl_torndown *gpadl_torndown;
1033         struct vmbus_channel_msginfo *msginfo;
1034         struct vmbus_channel_message_header *requestheader;
1035         struct vmbus_channel_gpadl_teardown *gpadl_teardown;
1036         unsigned long flags;
1037
1038         gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
1039
1040         /*
1041          * Find the open msg, copy the result and signal/unblock the wait event
1042          */
1043         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1044
1045         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1046                                 msglistentry) {
1047                 requestheader =
1048                         (struct vmbus_channel_message_header *)msginfo->msg;
1049
1050                 if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
1051                         gpadl_teardown =
1052                         (struct vmbus_channel_gpadl_teardown *)requestheader;
1053
1054                         if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
1055                                 memcpy(&msginfo->response.gpadl_torndown,
1056                                        gpadl_torndown,
1057                                        sizeof(
1058                                         struct vmbus_channel_gpadl_torndown));
1059                                 complete(&msginfo->waitevent);
1060                                 break;
1061                         }
1062                 }
1063         }
1064         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1065 }
1066
1067 /*
1068  * vmbus_onversion_response - Version response handler
1069  *
1070  * This is invoked when we received a response to our initiate contact request.
1071  * Find the matching request, copy the response and signal the requesting
1072  * thread.
1073  */
1074 static void vmbus_onversion_response(
1075                 struct vmbus_channel_message_header *hdr)
1076 {
1077         struct vmbus_channel_msginfo *msginfo;
1078         struct vmbus_channel_message_header *requestheader;
1079         struct vmbus_channel_version_response *version_response;
1080         unsigned long flags;
1081
1082         version_response = (struct vmbus_channel_version_response *)hdr;
1083         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1084
1085         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1086                                 msglistentry) {
1087                 requestheader =
1088                         (struct vmbus_channel_message_header *)msginfo->msg;
1089
1090                 if (requestheader->msgtype ==
1091                     CHANNELMSG_INITIATE_CONTACT) {
1092                         memcpy(&msginfo->response.version_response,
1093                               version_response,
1094                               sizeof(struct vmbus_channel_version_response));
1095                         complete(&msginfo->waitevent);
1096                 }
1097         }
1098         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1099 }
1100
1101 /* Channel message dispatch table */
1102 struct vmbus_channel_message_table_entry
1103         channel_message_table[CHANNELMSG_COUNT] = {
1104         {CHANNELMSG_INVALID,                    0, NULL},
1105         {CHANNELMSG_OFFERCHANNEL,               0, vmbus_onoffer},
1106         {CHANNELMSG_RESCIND_CHANNELOFFER,       0, vmbus_onoffer_rescind},
1107         {CHANNELMSG_REQUESTOFFERS,              0, NULL},
1108         {CHANNELMSG_ALLOFFERS_DELIVERED,        1, vmbus_onoffers_delivered},
1109         {CHANNELMSG_OPENCHANNEL,                0, NULL},
1110         {CHANNELMSG_OPENCHANNEL_RESULT,         1, vmbus_onopen_result},
1111         {CHANNELMSG_CLOSECHANNEL,               0, NULL},
1112         {CHANNELMSG_GPADL_HEADER,               0, NULL},
1113         {CHANNELMSG_GPADL_BODY,                 0, NULL},
1114         {CHANNELMSG_GPADL_CREATED,              1, vmbus_ongpadl_created},
1115         {CHANNELMSG_GPADL_TEARDOWN,             0, NULL},
1116         {CHANNELMSG_GPADL_TORNDOWN,             1, vmbus_ongpadl_torndown},
1117         {CHANNELMSG_RELID_RELEASED,             0, NULL},
1118         {CHANNELMSG_INITIATE_CONTACT,           0, NULL},
1119         {CHANNELMSG_VERSION_RESPONSE,           1, vmbus_onversion_response},
1120         {CHANNELMSG_UNLOAD,                     0, NULL},
1121         {CHANNELMSG_UNLOAD_RESPONSE,            1, vmbus_unload_response},
1122         {CHANNELMSG_18,                         0, NULL},
1123         {CHANNELMSG_19,                         0, NULL},
1124         {CHANNELMSG_20,                         0, NULL},
1125         {CHANNELMSG_TL_CONNECT_REQUEST,         0, NULL},
1126 };
1127
1128 /*
1129  * vmbus_onmessage - Handler for channel protocol messages.
1130  *
1131  * This is invoked in the vmbus worker thread context.
1132  */
1133 void vmbus_onmessage(void *context)
1134 {
1135         struct hv_message *msg = context;
1136         struct vmbus_channel_message_header *hdr;
1137         int size;
1138
1139         hdr = (struct vmbus_channel_message_header *)msg->u.payload;
1140         size = msg->header.payload_size;
1141
1142         if (hdr->msgtype >= CHANNELMSG_COUNT) {
1143                 pr_err("Received invalid channel message type %d size %d\n",
1144                            hdr->msgtype, size);
1145                 print_hex_dump_bytes("", DUMP_PREFIX_NONE,
1146                                      (unsigned char *)msg->u.payload, size);
1147                 return;
1148         }
1149
1150         if (channel_message_table[hdr->msgtype].message_handler)
1151                 channel_message_table[hdr->msgtype].message_handler(hdr);
1152         else
1153                 pr_err("Unhandled channel message type %d\n", hdr->msgtype);
1154 }
1155
1156 /*
1157  * vmbus_request_offers - Send a request to get all our pending offers.
1158  */
1159 int vmbus_request_offers(void)
1160 {
1161         struct vmbus_channel_message_header *msg;
1162         struct vmbus_channel_msginfo *msginfo;
1163         int ret;
1164
1165         msginfo = kmalloc(sizeof(*msginfo) +
1166                           sizeof(struct vmbus_channel_message_header),
1167                           GFP_KERNEL);
1168         if (!msginfo)
1169                 return -ENOMEM;
1170
1171         msg = (struct vmbus_channel_message_header *)msginfo->msg;
1172
1173         msg->msgtype = CHANNELMSG_REQUESTOFFERS;
1174
1175
1176         ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_message_header),
1177                              true);
1178         if (ret != 0) {
1179                 pr_err("Unable to request offers - %d\n", ret);
1180
1181                 goto cleanup;
1182         }
1183
1184 cleanup:
1185         kfree(msginfo);
1186
1187         return ret;
1188 }
1189
1190 /*
1191  * Retrieve the (sub) channel on which to send an outgoing request.
1192  * When a primary channel has multiple sub-channels, we try to
1193  * distribute the load equally amongst all available channels.
1194  */
1195 struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
1196 {
1197         struct list_head *cur, *tmp;
1198         int cur_cpu;
1199         struct vmbus_channel *cur_channel;
1200         struct vmbus_channel *outgoing_channel = primary;
1201         int next_channel;
1202         int i = 1;
1203
1204         if (list_empty(&primary->sc_list))
1205                 return outgoing_channel;
1206
1207         next_channel = primary->next_oc++;
1208
1209         if (next_channel > (primary->num_sc)) {
1210                 primary->next_oc = 0;
1211                 return outgoing_channel;
1212         }
1213
1214         cur_cpu = hv_context.vp_index[get_cpu()];
1215         put_cpu();
1216         list_for_each_safe(cur, tmp, &primary->sc_list) {
1217                 cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
1218                 if (cur_channel->state != CHANNEL_OPENED_STATE)
1219                         continue;
1220
1221                 if (cur_channel->target_vp == cur_cpu)
1222                         return cur_channel;
1223
1224                 if (i == next_channel)
1225                         return cur_channel;
1226
1227                 i++;
1228         }
1229
1230         return outgoing_channel;
1231 }
1232 EXPORT_SYMBOL_GPL(vmbus_get_outgoing_channel);
1233
1234 static void invoke_sc_cb(struct vmbus_channel *primary_channel)
1235 {
1236         struct list_head *cur, *tmp;
1237         struct vmbus_channel *cur_channel;
1238
1239         if (primary_channel->sc_creation_callback == NULL)
1240                 return;
1241
1242         list_for_each_safe(cur, tmp, &primary_channel->sc_list) {
1243                 cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
1244
1245                 primary_channel->sc_creation_callback(cur_channel);
1246         }
1247 }
1248
1249 void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
1250                                 void (*sc_cr_cb)(struct vmbus_channel *new_sc))
1251 {
1252         primary_channel->sc_creation_callback = sc_cr_cb;
1253 }
1254 EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);
1255
1256 bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
1257 {
1258         bool ret;
1259
1260         ret = !list_empty(&primary->sc_list);
1261
1262         if (ret) {
1263                 /*
1264                  * Invoke the callback on sub-channel creation.
1265                  * This will present a uniform interface to the
1266                  * clients.
1267                  */
1268                 invoke_sc_cb(primary);
1269         }
1270
1271         return ret;
1272 }
1273 EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);
1274
1275 void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel,
1276                 void (*chn_rescind_cb)(struct vmbus_channel *))
1277 {
1278         channel->chn_rescind_callback = chn_rescind_cb;
1279 }
1280 EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback);