]>
Commit | Line | Data |
---|---|---|
3b20eb23 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
3e7ee490 | 2 | /* |
3e7ee490 HJ |
3 | * Copyright (c) 2009, Microsoft Corporation. |
4 | * | |
3e7ee490 HJ |
5 | * Authors: |
6 | * Haiyang Zhang <haiyangz@microsoft.com> | |
7 | * Hank Janssen <hjanssen@microsoft.com> | |
b0069f43 | 8 | * K. Y. Srinivasan <kys@microsoft.com> |
3e7ee490 | 9 | */ |
0a46618d HJ |
10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
11 | ||
3e7ee490 HJ |
12 | #include <linux/init.h> |
13 | #include <linux/module.h> | |
14 | #include <linux/device.h> | |
3e7ee490 HJ |
15 | #include <linux/interrupt.h> |
16 | #include <linux/sysctl.h> | |
5a0e3ad6 | 17 | #include <linux/slab.h> |
b0069f43 | 18 | #include <linux/acpi.h> |
8b5d6d3b | 19 | #include <linux/completion.h> |
46a97191 | 20 | #include <linux/hyperv.h> |
b0209501 | 21 | #include <linux/kernel_stat.h> |
4061ed9e | 22 | #include <linux/clockchips.h> |
e513229b | 23 | #include <linux/cpu.h> |
68db0cf1 IM |
24 | #include <linux/sched/task_stack.h> |
25 | ||
1f48dcf1 | 26 | #include <linux/delay.h> |
96c1d058 NM |
27 | #include <linux/notifier.h> |
28 | #include <linux/ptrace.h> | |
35464483 | 29 | #include <linux/screen_info.h> |
510f7aef | 30 | #include <linux/kdebug.h> |
6d146aef | 31 | #include <linux/efi.h> |
4b44f2d1 | 32 | #include <linux/random.h> |
f3a99e76 | 33 | #include <linux/kernel.h> |
63ecc6d2 | 34 | #include <linux/syscore_ops.h> |
fd1fea68 | 35 | #include <clocksource/hyperv_timer.h> |
0f2a6619 | 36 | #include "hyperv_vmbus.h" |
3e7ee490 | 37 | |
fc76936d SH |
38 | struct vmbus_dynid { |
39 | struct list_head node; | |
40 | struct hv_vmbus_device_id id; | |
41 | }; | |
42 | ||
607c1a11 | 43 | static struct acpi_device *hv_acpi_dev; |
1168ac22 | 44 | |
71a6655d | 45 | static struct completion probe_event; |
98db4335 | 46 | |
76d36ab7 | 47 | static int hyperv_cpuhp_online; |
96c1d058 | 48 | |
81b18bce SM |
49 | static void *hv_panic_page; |
50 | ||
626b901f MK |
51 | /* Values parsed from ACPI DSDT */ |
52 | static int vmbus_irq; | |
53 | int vmbus_interrupt; | |
54 | ||
040026df TL |
55 | /* |
56 | * Boolean to control whether to report panic messages over Hyper-V. | |
57 | * | |
58 | * It can be set via /proc/sys/kernel/hyperv/record_panic_msg | |
59 | */ | |
60 | static int sysctl_record_panic_msg = 1; | |
61 | ||
62 | static int hyperv_report_reg(void) | |
63 | { | |
64 | return !sysctl_record_panic_msg || !hv_panic_page; | |
65 | } | |
66 | ||
510f7aef VK |
67 | static int hyperv_panic_event(struct notifier_block *nb, unsigned long val, |
68 | void *args) | |
69 | { | |
70 | struct pt_regs *regs; | |
71 | ||
74347a99 | 72 | vmbus_initiate_unload(true); |
510f7aef | 73 | |
73f26e52 TL |
74 | /* |
75 | * Hyper-V should be notified only once about a panic. If we will be | |
76 | * doing hyperv_report_panic_msg() later with kmsg data, don't do | |
77 | * the notification here. | |
78 | */ | |
79 | if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE | |
040026df | 80 | && hyperv_report_reg()) { |
74347a99 | 81 | regs = current_pt_regs(); |
f3a99e76 | 82 | hyperv_report_panic(regs, val, false); |
74347a99 | 83 | } |
96c1d058 NM |
84 | return NOTIFY_DONE; |
85 | } | |
86 | ||
510f7aef VK |
87 | static int hyperv_die_event(struct notifier_block *nb, unsigned long val, |
88 | void *args) | |
89 | { | |
49971e6b | 90 | struct die_args *die = args; |
510f7aef VK |
91 | struct pt_regs *regs = die->regs; |
92 | ||
608a973b MK |
93 | /* Don't notify Hyper-V if the die event is other than oops */ |
94 | if (val != DIE_OOPS) | |
95 | return NOTIFY_DONE; | |
96 | ||
73f26e52 TL |
97 | /* |
98 | * Hyper-V should be notified only once about a panic. If we will be | |
99 | * doing hyperv_report_panic_msg() later with kmsg data, don't do | |
100 | * the notification here. | |
101 | */ | |
040026df | 102 | if (hyperv_report_reg()) |
f3a99e76 | 103 | hyperv_report_panic(regs, val, true); |
510f7aef VK |
104 | return NOTIFY_DONE; |
105 | } | |
106 | ||
107 | static struct notifier_block hyperv_die_block = { | |
108 | .notifier_call = hyperv_die_event, | |
109 | }; | |
96c1d058 NM |
110 | static struct notifier_block hyperv_panic_block = { |
111 | .notifier_call = hyperv_panic_event, | |
112 | }; | |
113 | ||
6d146aef JO |
114 | static const char *fb_mmio_name = "fb_range"; |
115 | static struct resource *fb_mmio; | |
e2e80841 | 116 | static struct resource *hyperv_mmio; |
8aea7f82 | 117 | static DEFINE_MUTEX(hyperv_mmio_lock); |
98db4335 | 118 | |
cf6a2eac S |
119 | static int vmbus_exists(void) |
120 | { | |
121 | if (hv_acpi_dev == NULL) | |
122 | return -ENODEV; | |
123 | ||
124 | return 0; | |
125 | } | |
126 | ||
c2e5df61 | 127 | static u8 channel_monitor_group(const struct vmbus_channel *channel) |
76c52bbe GKH |
128 | { |
129 | return (u8)channel->offermsg.monitorid / 32; | |
130 | } | |
131 | ||
c2e5df61 | 132 | static u8 channel_monitor_offset(const struct vmbus_channel *channel) |
76c52bbe GKH |
133 | { |
134 | return (u8)channel->offermsg.monitorid % 32; | |
135 | } | |
136 | ||
c2e5df61 SH |
137 | static u32 channel_pending(const struct vmbus_channel *channel, |
138 | const struct hv_monitor_page *monitor_page) | |
76c52bbe GKH |
139 | { |
140 | u8 monitor_group = channel_monitor_group(channel); | |
c2e5df61 | 141 | |
76c52bbe GKH |
142 | return monitor_page->trigger_group[monitor_group].pending; |
143 | } | |
144 | ||
c2e5df61 SH |
145 | static u32 channel_latency(const struct vmbus_channel *channel, |
146 | const struct hv_monitor_page *monitor_page) | |
1cee272b GKH |
147 | { |
148 | u8 monitor_group = channel_monitor_group(channel); | |
149 | u8 monitor_offset = channel_monitor_offset(channel); | |
c2e5df61 | 150 | |
1cee272b GKH |
151 | return monitor_page->latency[monitor_group][monitor_offset]; |
152 | } | |
153 | ||
4947c745 GKH |
154 | static u32 channel_conn_id(struct vmbus_channel *channel, |
155 | struct hv_monitor_page *monitor_page) | |
156 | { | |
157 | u8 monitor_group = channel_monitor_group(channel); | |
158 | u8 monitor_offset = channel_monitor_offset(channel); | |
159 | return monitor_page->parameter[monitor_group][monitor_offset].connectionid.u.id; | |
160 | } | |
161 | ||
03f3a910 GKH |
162 | static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr, |
163 | char *buf) | |
164 | { | |
165 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
166 | ||
167 | if (!hv_dev->channel) | |
168 | return -ENODEV; | |
169 | return sprintf(buf, "%d\n", hv_dev->channel->offermsg.child_relid); | |
170 | } | |
171 | static DEVICE_ATTR_RO(id); | |
172 | ||
a8fb5f3d GKH |
173 | static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr, |
174 | char *buf) | |
175 | { | |
176 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
177 | ||
178 | if (!hv_dev->channel) | |
179 | return -ENODEV; | |
180 | return sprintf(buf, "%d\n", hv_dev->channel->state); | |
181 | } | |
182 | static DEVICE_ATTR_RO(state); | |
183 | ||
5ffd00e2 GKH |
184 | static ssize_t monitor_id_show(struct device *dev, |
185 | struct device_attribute *dev_attr, char *buf) | |
186 | { | |
187 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
188 | ||
189 | if (!hv_dev->channel) | |
190 | return -ENODEV; | |
191 | return sprintf(buf, "%d\n", hv_dev->channel->offermsg.monitorid); | |
192 | } | |
193 | static DEVICE_ATTR_RO(monitor_id); | |
194 | ||
68234c04 GKH |
195 | static ssize_t class_id_show(struct device *dev, |
196 | struct device_attribute *dev_attr, char *buf) | |
197 | { | |
198 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
199 | ||
200 | if (!hv_dev->channel) | |
201 | return -ENODEV; | |
202 | return sprintf(buf, "{%pUl}\n", | |
458c4475 | 203 | &hv_dev->channel->offermsg.offer.if_type); |
68234c04 GKH |
204 | } |
205 | static DEVICE_ATTR_RO(class_id); | |
206 | ||
7c55e1d0 GKH |
207 | static ssize_t device_id_show(struct device *dev, |
208 | struct device_attribute *dev_attr, char *buf) | |
209 | { | |
210 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
211 | ||
212 | if (!hv_dev->channel) | |
213 | return -ENODEV; | |
214 | return sprintf(buf, "{%pUl}\n", | |
458c4475 | 215 | &hv_dev->channel->offermsg.offer.if_instance); |
7c55e1d0 GKH |
216 | } |
217 | static DEVICE_ATTR_RO(device_id); | |
218 | ||
647fa371 GKH |
219 | static ssize_t modalias_show(struct device *dev, |
220 | struct device_attribute *dev_attr, char *buf) | |
221 | { | |
222 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
647fa371 | 223 | |
0027e3fd | 224 | return sprintf(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type); |
647fa371 GKH |
225 | } |
226 | static DEVICE_ATTR_RO(modalias); | |
227 | ||
7ceb1c37 SH |
228 | #ifdef CONFIG_NUMA |
229 | static ssize_t numa_node_show(struct device *dev, | |
230 | struct device_attribute *attr, char *buf) | |
231 | { | |
232 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
233 | ||
234 | if (!hv_dev->channel) | |
235 | return -ENODEV; | |
236 | ||
458d090f | 237 | return sprintf(buf, "%d\n", cpu_to_node(hv_dev->channel->target_cpu)); |
7ceb1c37 SH |
238 | } |
239 | static DEVICE_ATTR_RO(numa_node); | |
240 | #endif | |
241 | ||
76c52bbe GKH |
242 | static ssize_t server_monitor_pending_show(struct device *dev, |
243 | struct device_attribute *dev_attr, | |
244 | char *buf) | |
245 | { | |
246 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
247 | ||
248 | if (!hv_dev->channel) | |
249 | return -ENODEV; | |
250 | return sprintf(buf, "%d\n", | |
251 | channel_pending(hv_dev->channel, | |
fd8e3c35 | 252 | vmbus_connection.monitor_pages[0])); |
76c52bbe GKH |
253 | } |
254 | static DEVICE_ATTR_RO(server_monitor_pending); | |
255 | ||
256 | static ssize_t client_monitor_pending_show(struct device *dev, | |
257 | struct device_attribute *dev_attr, | |
258 | char *buf) | |
259 | { | |
260 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
261 | ||
262 | if (!hv_dev->channel) | |
263 | return -ENODEV; | |
264 | return sprintf(buf, "%d\n", | |
265 | channel_pending(hv_dev->channel, | |
266 | vmbus_connection.monitor_pages[1])); | |
267 | } | |
268 | static DEVICE_ATTR_RO(client_monitor_pending); | |
68234c04 | 269 | |
1cee272b GKH |
270 | static ssize_t server_monitor_latency_show(struct device *dev, |
271 | struct device_attribute *dev_attr, | |
272 | char *buf) | |
273 | { | |
274 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
275 | ||
276 | if (!hv_dev->channel) | |
277 | return -ENODEV; | |
278 | return sprintf(buf, "%d\n", | |
279 | channel_latency(hv_dev->channel, | |
280 | vmbus_connection.monitor_pages[0])); | |
281 | } | |
282 | static DEVICE_ATTR_RO(server_monitor_latency); | |
283 | ||
284 | static ssize_t client_monitor_latency_show(struct device *dev, | |
285 | struct device_attribute *dev_attr, | |
286 | char *buf) | |
287 | { | |
288 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
289 | ||
290 | if (!hv_dev->channel) | |
291 | return -ENODEV; | |
292 | return sprintf(buf, "%d\n", | |
293 | channel_latency(hv_dev->channel, | |
294 | vmbus_connection.monitor_pages[1])); | |
295 | } | |
296 | static DEVICE_ATTR_RO(client_monitor_latency); | |
297 | ||
4947c745 GKH |
298 | static ssize_t server_monitor_conn_id_show(struct device *dev, |
299 | struct device_attribute *dev_attr, | |
300 | char *buf) | |
301 | { | |
302 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
303 | ||
304 | if (!hv_dev->channel) | |
305 | return -ENODEV; | |
306 | return sprintf(buf, "%d\n", | |
307 | channel_conn_id(hv_dev->channel, | |
308 | vmbus_connection.monitor_pages[0])); | |
309 | } | |
310 | static DEVICE_ATTR_RO(server_monitor_conn_id); | |
311 | ||
312 | static ssize_t client_monitor_conn_id_show(struct device *dev, | |
313 | struct device_attribute *dev_attr, | |
314 | char *buf) | |
315 | { | |
316 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
317 | ||
318 | if (!hv_dev->channel) | |
319 | return -ENODEV; | |
320 | return sprintf(buf, "%d\n", | |
321 | channel_conn_id(hv_dev->channel, | |
322 | vmbus_connection.monitor_pages[1])); | |
323 | } | |
324 | static DEVICE_ATTR_RO(client_monitor_conn_id); | |
325 | ||
98f4c651 GKH |
326 | static ssize_t out_intr_mask_show(struct device *dev, |
327 | struct device_attribute *dev_attr, char *buf) | |
328 | { | |
329 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
330 | struct hv_ring_buffer_debug_info outbound; | |
ba50bf1c | 331 | int ret; |
98f4c651 GKH |
332 | |
333 | if (!hv_dev->channel) | |
334 | return -ENODEV; | |
ba50bf1c DC |
335 | |
336 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, | |
337 | &outbound); | |
338 | if (ret < 0) | |
339 | return ret; | |
340 | ||
98f4c651 GKH |
341 | return sprintf(buf, "%d\n", outbound.current_interrupt_mask); |
342 | } | |
343 | static DEVICE_ATTR_RO(out_intr_mask); | |
344 | ||
345 | static ssize_t out_read_index_show(struct device *dev, | |
346 | struct device_attribute *dev_attr, char *buf) | |
347 | { | |
348 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
349 | struct hv_ring_buffer_debug_info outbound; | |
ba50bf1c | 350 | int ret; |
98f4c651 GKH |
351 | |
352 | if (!hv_dev->channel) | |
353 | return -ENODEV; | |
ba50bf1c DC |
354 | |
355 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, | |
356 | &outbound); | |
357 | if (ret < 0) | |
358 | return ret; | |
98f4c651 GKH |
359 | return sprintf(buf, "%d\n", outbound.current_read_index); |
360 | } | |
361 | static DEVICE_ATTR_RO(out_read_index); | |
362 | ||
363 | static ssize_t out_write_index_show(struct device *dev, | |
364 | struct device_attribute *dev_attr, | |
365 | char *buf) | |
366 | { | |
367 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
368 | struct hv_ring_buffer_debug_info outbound; | |
ba50bf1c | 369 | int ret; |
98f4c651 GKH |
370 | |
371 | if (!hv_dev->channel) | |
372 | return -ENODEV; | |
ba50bf1c DC |
373 | |
374 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, | |
375 | &outbound); | |
376 | if (ret < 0) | |
377 | return ret; | |
98f4c651 GKH |
378 | return sprintf(buf, "%d\n", outbound.current_write_index); |
379 | } | |
380 | static DEVICE_ATTR_RO(out_write_index); | |
381 | ||
382 | static ssize_t out_read_bytes_avail_show(struct device *dev, | |
383 | struct device_attribute *dev_attr, | |
384 | char *buf) | |
385 | { | |
386 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
387 | struct hv_ring_buffer_debug_info outbound; | |
ba50bf1c | 388 | int ret; |
98f4c651 GKH |
389 | |
390 | if (!hv_dev->channel) | |
391 | return -ENODEV; | |
ba50bf1c DC |
392 | |
393 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, | |
394 | &outbound); | |
395 | if (ret < 0) | |
396 | return ret; | |
98f4c651 GKH |
397 | return sprintf(buf, "%d\n", outbound.bytes_avail_toread); |
398 | } | |
399 | static DEVICE_ATTR_RO(out_read_bytes_avail); | |
400 | ||
401 | static ssize_t out_write_bytes_avail_show(struct device *dev, | |
402 | struct device_attribute *dev_attr, | |
403 | char *buf) | |
404 | { | |
405 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
406 | struct hv_ring_buffer_debug_info outbound; | |
ba50bf1c | 407 | int ret; |
98f4c651 GKH |
408 | |
409 | if (!hv_dev->channel) | |
410 | return -ENODEV; | |
ba50bf1c DC |
411 | |
412 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, | |
413 | &outbound); | |
414 | if (ret < 0) | |
415 | return ret; | |
98f4c651 GKH |
416 | return sprintf(buf, "%d\n", outbound.bytes_avail_towrite); |
417 | } | |
418 | static DEVICE_ATTR_RO(out_write_bytes_avail); | |
419 | ||
420 | static ssize_t in_intr_mask_show(struct device *dev, | |
421 | struct device_attribute *dev_attr, char *buf) | |
422 | { | |
423 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
424 | struct hv_ring_buffer_debug_info inbound; | |
ba50bf1c | 425 | int ret; |
98f4c651 GKH |
426 | |
427 | if (!hv_dev->channel) | |
428 | return -ENODEV; | |
ba50bf1c DC |
429 | |
430 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); | |
431 | if (ret < 0) | |
432 | return ret; | |
433 | ||
98f4c651 GKH |
434 | return sprintf(buf, "%d\n", inbound.current_interrupt_mask); |
435 | } | |
436 | static DEVICE_ATTR_RO(in_intr_mask); | |
437 | ||
438 | static ssize_t in_read_index_show(struct device *dev, | |
439 | struct device_attribute *dev_attr, char *buf) | |
440 | { | |
441 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
442 | struct hv_ring_buffer_debug_info inbound; | |
ba50bf1c | 443 | int ret; |
98f4c651 GKH |
444 | |
445 | if (!hv_dev->channel) | |
446 | return -ENODEV; | |
ba50bf1c DC |
447 | |
448 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); | |
449 | if (ret < 0) | |
450 | return ret; | |
451 | ||
98f4c651 GKH |
452 | return sprintf(buf, "%d\n", inbound.current_read_index); |
453 | } | |
454 | static DEVICE_ATTR_RO(in_read_index); | |
455 | ||
456 | static ssize_t in_write_index_show(struct device *dev, | |
457 | struct device_attribute *dev_attr, char *buf) | |
458 | { | |
459 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
460 | struct hv_ring_buffer_debug_info inbound; | |
ba50bf1c | 461 | int ret; |
98f4c651 GKH |
462 | |
463 | if (!hv_dev->channel) | |
464 | return -ENODEV; | |
ba50bf1c DC |
465 | |
466 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); | |
467 | if (ret < 0) | |
468 | return ret; | |
469 | ||
98f4c651 GKH |
470 | return sprintf(buf, "%d\n", inbound.current_write_index); |
471 | } | |
472 | static DEVICE_ATTR_RO(in_write_index); | |
473 | ||
474 | static ssize_t in_read_bytes_avail_show(struct device *dev, | |
475 | struct device_attribute *dev_attr, | |
476 | char *buf) | |
477 | { | |
478 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
479 | struct hv_ring_buffer_debug_info inbound; | |
ba50bf1c | 480 | int ret; |
98f4c651 GKH |
481 | |
482 | if (!hv_dev->channel) | |
483 | return -ENODEV; | |
ba50bf1c DC |
484 | |
485 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); | |
486 | if (ret < 0) | |
487 | return ret; | |
488 | ||
98f4c651 GKH |
489 | return sprintf(buf, "%d\n", inbound.bytes_avail_toread); |
490 | } | |
491 | static DEVICE_ATTR_RO(in_read_bytes_avail); | |
492 | ||
493 | static ssize_t in_write_bytes_avail_show(struct device *dev, | |
494 | struct device_attribute *dev_attr, | |
495 | char *buf) | |
496 | { | |
497 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
498 | struct hv_ring_buffer_debug_info inbound; | |
ba50bf1c | 499 | int ret; |
98f4c651 GKH |
500 | |
501 | if (!hv_dev->channel) | |
502 | return -ENODEV; | |
ba50bf1c DC |
503 | |
504 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); | |
505 | if (ret < 0) | |
506 | return ret; | |
507 | ||
98f4c651 GKH |
508 | return sprintf(buf, "%d\n", inbound.bytes_avail_towrite); |
509 | } | |
510 | static DEVICE_ATTR_RO(in_write_bytes_avail); | |
511 | ||
042ab031 DC |
512 | static ssize_t channel_vp_mapping_show(struct device *dev, |
513 | struct device_attribute *dev_attr, | |
514 | char *buf) | |
515 | { | |
516 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
517 | struct vmbus_channel *channel = hv_dev->channel, *cur_sc; | |
042ab031 DC |
518 | int buf_size = PAGE_SIZE, n_written, tot_written; |
519 | struct list_head *cur; | |
520 | ||
521 | if (!channel) | |
522 | return -ENODEV; | |
523 | ||
3eb0ac86 APM |
524 | mutex_lock(&vmbus_connection.channel_mutex); |
525 | ||
042ab031 DC |
526 | tot_written = snprintf(buf, buf_size, "%u:%u\n", |
527 | channel->offermsg.child_relid, channel->target_cpu); | |
528 | ||
042ab031 DC |
529 | list_for_each(cur, &channel->sc_list) { |
530 | if (tot_written >= buf_size - 1) | |
531 | break; | |
532 | ||
533 | cur_sc = list_entry(cur, struct vmbus_channel, sc_list); | |
534 | n_written = scnprintf(buf + tot_written, | |
535 | buf_size - tot_written, | |
536 | "%u:%u\n", | |
537 | cur_sc->offermsg.child_relid, | |
538 | cur_sc->target_cpu); | |
539 | tot_written += n_written; | |
540 | } | |
541 | ||
3eb0ac86 | 542 | mutex_unlock(&vmbus_connection.channel_mutex); |
042ab031 DC |
543 | |
544 | return tot_written; | |
545 | } | |
546 | static DEVICE_ATTR_RO(channel_vp_mapping); | |
547 | ||
7047f17d S |
548 | static ssize_t vendor_show(struct device *dev, |
549 | struct device_attribute *dev_attr, | |
550 | char *buf) | |
551 | { | |
552 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
553 | return sprintf(buf, "0x%x\n", hv_dev->vendor_id); | |
554 | } | |
555 | static DEVICE_ATTR_RO(vendor); | |
556 | ||
557 | static ssize_t device_show(struct device *dev, | |
558 | struct device_attribute *dev_attr, | |
559 | char *buf) | |
560 | { | |
561 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
562 | return sprintf(buf, "0x%x\n", hv_dev->device_id); | |
563 | } | |
564 | static DEVICE_ATTR_RO(device); | |
565 | ||
d765edbb SH |
566 | static ssize_t driver_override_store(struct device *dev, |
567 | struct device_attribute *attr, | |
568 | const char *buf, size_t count) | |
569 | { | |
570 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
571 | char *driver_override, *old, *cp; | |
572 | ||
573 | /* We need to keep extra room for a newline */ | |
574 | if (count >= (PAGE_SIZE - 1)) | |
575 | return -EINVAL; | |
576 | ||
577 | driver_override = kstrndup(buf, count, GFP_KERNEL); | |
578 | if (!driver_override) | |
579 | return -ENOMEM; | |
580 | ||
581 | cp = strchr(driver_override, '\n'); | |
582 | if (cp) | |
583 | *cp = '\0'; | |
584 | ||
585 | device_lock(dev); | |
586 | old = hv_dev->driver_override; | |
587 | if (strlen(driver_override)) { | |
588 | hv_dev->driver_override = driver_override; | |
589 | } else { | |
590 | kfree(driver_override); | |
591 | hv_dev->driver_override = NULL; | |
592 | } | |
593 | device_unlock(dev); | |
594 | ||
595 | kfree(old); | |
596 | ||
597 | return count; | |
598 | } | |
599 | ||
600 | static ssize_t driver_override_show(struct device *dev, | |
601 | struct device_attribute *attr, char *buf) | |
602 | { | |
603 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
604 | ssize_t len; | |
605 | ||
606 | device_lock(dev); | |
607 | len = snprintf(buf, PAGE_SIZE, "%s\n", hv_dev->driver_override); | |
608 | device_unlock(dev); | |
609 | ||
610 | return len; | |
611 | } | |
612 | static DEVICE_ATTR_RW(driver_override); | |
613 | ||
98f4c651 | 614 | /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */ |
fc76936d | 615 | static struct attribute *vmbus_dev_attrs[] = { |
03f3a910 | 616 | &dev_attr_id.attr, |
a8fb5f3d | 617 | &dev_attr_state.attr, |
5ffd00e2 | 618 | &dev_attr_monitor_id.attr, |
68234c04 | 619 | &dev_attr_class_id.attr, |
7c55e1d0 | 620 | &dev_attr_device_id.attr, |
647fa371 | 621 | &dev_attr_modalias.attr, |
7ceb1c37 SH |
622 | #ifdef CONFIG_NUMA |
623 | &dev_attr_numa_node.attr, | |
624 | #endif | |
76c52bbe GKH |
625 | &dev_attr_server_monitor_pending.attr, |
626 | &dev_attr_client_monitor_pending.attr, | |
1cee272b GKH |
627 | &dev_attr_server_monitor_latency.attr, |
628 | &dev_attr_client_monitor_latency.attr, | |
4947c745 GKH |
629 | &dev_attr_server_monitor_conn_id.attr, |
630 | &dev_attr_client_monitor_conn_id.attr, | |
98f4c651 GKH |
631 | &dev_attr_out_intr_mask.attr, |
632 | &dev_attr_out_read_index.attr, | |
633 | &dev_attr_out_write_index.attr, | |
634 | &dev_attr_out_read_bytes_avail.attr, | |
635 | &dev_attr_out_write_bytes_avail.attr, | |
636 | &dev_attr_in_intr_mask.attr, | |
637 | &dev_attr_in_read_index.attr, | |
638 | &dev_attr_in_write_index.attr, | |
639 | &dev_attr_in_read_bytes_avail.attr, | |
640 | &dev_attr_in_write_bytes_avail.attr, | |
042ab031 | 641 | &dev_attr_channel_vp_mapping.attr, |
7047f17d S |
642 | &dev_attr_vendor.attr, |
643 | &dev_attr_device.attr, | |
d765edbb | 644 | &dev_attr_driver_override.attr, |
03f3a910 GKH |
645 | NULL, |
646 | }; | |
46fc1548 KB |
647 | |
648 | /* | |
649 | * Device-level attribute_group callback function. Returns the permission for | |
650 | * each attribute, and returns 0 if an attribute is not visible. | |
651 | */ | |
652 | static umode_t vmbus_dev_attr_is_visible(struct kobject *kobj, | |
653 | struct attribute *attr, int idx) | |
654 | { | |
655 | struct device *dev = kobj_to_dev(kobj); | |
656 | const struct hv_device *hv_dev = device_to_hv_device(dev); | |
657 | ||
658 | /* Hide the monitor attributes if the monitor mechanism is not used. */ | |
659 | if (!hv_dev->channel->offermsg.monitor_allocated && | |
660 | (attr == &dev_attr_monitor_id.attr || | |
661 | attr == &dev_attr_server_monitor_pending.attr || | |
662 | attr == &dev_attr_client_monitor_pending.attr || | |
663 | attr == &dev_attr_server_monitor_latency.attr || | |
664 | attr == &dev_attr_client_monitor_latency.attr || | |
665 | attr == &dev_attr_server_monitor_conn_id.attr || | |
666 | attr == &dev_attr_client_monitor_conn_id.attr)) | |
667 | return 0; | |
668 | ||
669 | return attr->mode; | |
670 | } | |
671 | ||
672 | static const struct attribute_group vmbus_dev_group = { | |
673 | .attrs = vmbus_dev_attrs, | |
674 | .is_visible = vmbus_dev_attr_is_visible | |
675 | }; | |
676 | __ATTRIBUTE_GROUPS(vmbus_dev); | |
03f3a910 | 677 | |
adde2487 S |
678 | /* |
679 | * vmbus_uevent - add uevent for our device | |
680 | * | |
681 | * This routine is invoked when a device is added or removed on the vmbus to | |
682 | * generate a uevent to udev in the userspace. The udev will then look at its | |
683 | * rule and the uevent generated here to load the appropriate driver | |
0ddda660 S |
684 | * |
685 | * The alias string will be of the form vmbus:guid where guid is the string | |
686 | * representation of the device guid (each byte of the guid will be | |
687 | * represented with two hex characters. | |
adde2487 S |
688 | */ |
689 | static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env) | |
690 | { | |
691 | struct hv_device *dev = device_to_hv_device(device); | |
0027e3fd | 692 | const char *format = "MODALIAS=vmbus:%*phN"; |
0ddda660 | 693 | |
0027e3fd | 694 | return add_uevent_var(env, format, UUID_SIZE, &dev->dev_type); |
adde2487 S |
695 | } |
696 | ||
d765edbb | 697 | static const struct hv_vmbus_device_id * |
593db803 | 698 | hv_vmbus_dev_match(const struct hv_vmbus_device_id *id, const guid_t *guid) |
d765edbb SH |
699 | { |
700 | if (id == NULL) | |
701 | return NULL; /* empty device table */ | |
702 | ||
593db803 AS |
703 | for (; !guid_is_null(&id->guid); id++) |
704 | if (guid_equal(&id->guid, guid)) | |
d765edbb SH |
705 | return id; |
706 | ||
707 | return NULL; | |
708 | } | |
709 | ||
710 | static const struct hv_vmbus_device_id * | |
593db803 | 711 | hv_vmbus_dynid_match(struct hv_driver *drv, const guid_t *guid) |
3037a7b6 | 712 | { |
fc76936d SH |
713 | const struct hv_vmbus_device_id *id = NULL; |
714 | struct vmbus_dynid *dynid; | |
715 | ||
fc76936d SH |
716 | spin_lock(&drv->dynids.lock); |
717 | list_for_each_entry(dynid, &drv->dynids.list, node) { | |
593db803 | 718 | if (guid_equal(&dynid->id.guid, guid)) { |
fc76936d SH |
719 | id = &dynid->id; |
720 | break; | |
721 | } | |
722 | } | |
723 | spin_unlock(&drv->dynids.lock); | |
724 | ||
d765edbb SH |
725 | return id; |
726 | } | |
fc76936d | 727 | |
593db803 | 728 | static const struct hv_vmbus_device_id vmbus_device_null; |
fc76936d | 729 | |
d765edbb SH |
730 | /* |
731 | * Return a matching hv_vmbus_device_id pointer. | |
732 | * If there is no match, return NULL. | |
733 | */ | |
734 | static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv, | |
735 | struct hv_device *dev) | |
736 | { | |
593db803 | 737 | const guid_t *guid = &dev->dev_type; |
d765edbb | 738 | const struct hv_vmbus_device_id *id; |
3037a7b6 | 739 | |
d765edbb SH |
740 | /* When driver_override is set, only bind to the matching driver */ |
741 | if (dev->driver_override && strcmp(dev->driver_override, drv->name)) | |
742 | return NULL; | |
743 | ||
744 | /* Look at the dynamic ids first, before the static ones */ | |
745 | id = hv_vmbus_dynid_match(drv, guid); | |
746 | if (!id) | |
747 | id = hv_vmbus_dev_match(drv->id_table, guid); | |
748 | ||
749 | /* driver_override will always match, send a dummy id */ | |
750 | if (!id && dev->driver_override) | |
751 | id = &vmbus_device_null; | |
752 | ||
753 | return id; | |
3037a7b6 S |
754 | } |
755 | ||
fc76936d | 756 | /* vmbus_add_dynid - add a new device ID to this driver and re-probe devices */ |
593db803 | 757 | static int vmbus_add_dynid(struct hv_driver *drv, guid_t *guid) |
fc76936d SH |
758 | { |
759 | struct vmbus_dynid *dynid; | |
760 | ||
761 | dynid = kzalloc(sizeof(*dynid), GFP_KERNEL); | |
762 | if (!dynid) | |
763 | return -ENOMEM; | |
764 | ||
765 | dynid->id.guid = *guid; | |
766 | ||
767 | spin_lock(&drv->dynids.lock); | |
768 | list_add_tail(&dynid->node, &drv->dynids.list); | |
769 | spin_unlock(&drv->dynids.lock); | |
770 | ||
771 | return driver_attach(&drv->driver); | |
772 | } | |
773 | ||
774 | static void vmbus_free_dynids(struct hv_driver *drv) | |
775 | { | |
776 | struct vmbus_dynid *dynid, *n; | |
777 | ||
778 | spin_lock(&drv->dynids.lock); | |
779 | list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) { | |
780 | list_del(&dynid->node); | |
781 | kfree(dynid); | |
782 | } | |
783 | spin_unlock(&drv->dynids.lock); | |
784 | } | |
785 | ||
fc76936d SH |
786 | /* |
787 | * store_new_id - sysfs frontend to vmbus_add_dynid() | |
788 | * | |
789 | * Allow GUIDs to be added to an existing driver via sysfs. | |
790 | */ | |
791 | static ssize_t new_id_store(struct device_driver *driver, const char *buf, | |
792 | size_t count) | |
793 | { | |
794 | struct hv_driver *drv = drv_to_hv_drv(driver); | |
593db803 | 795 | guid_t guid; |
fc76936d SH |
796 | ssize_t retval; |
797 | ||
593db803 | 798 | retval = guid_parse(buf, &guid); |
31100108 AS |
799 | if (retval) |
800 | return retval; | |
fc76936d | 801 | |
d765edbb | 802 | if (hv_vmbus_dynid_match(drv, &guid)) |
fc76936d SH |
803 | return -EEXIST; |
804 | ||
805 | retval = vmbus_add_dynid(drv, &guid); | |
806 | if (retval) | |
807 | return retval; | |
808 | return count; | |
809 | } | |
810 | static DRIVER_ATTR_WO(new_id); | |
811 | ||
812 | /* | |
813 | * store_remove_id - remove a PCI device ID from this driver | |
814 | * | |
815 | * Removes a dynamic pci device ID to this driver. | |
816 | */ | |
817 | static ssize_t remove_id_store(struct device_driver *driver, const char *buf, | |
818 | size_t count) | |
819 | { | |
820 | struct hv_driver *drv = drv_to_hv_drv(driver); | |
821 | struct vmbus_dynid *dynid, *n; | |
593db803 | 822 | guid_t guid; |
31100108 | 823 | ssize_t retval; |
fc76936d | 824 | |
593db803 | 825 | retval = guid_parse(buf, &guid); |
31100108 AS |
826 | if (retval) |
827 | return retval; | |
fc76936d | 828 | |
31100108 | 829 | retval = -ENODEV; |
fc76936d SH |
830 | spin_lock(&drv->dynids.lock); |
831 | list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) { | |
832 | struct hv_vmbus_device_id *id = &dynid->id; | |
833 | ||
593db803 | 834 | if (guid_equal(&id->guid, &guid)) { |
fc76936d SH |
835 | list_del(&dynid->node); |
836 | kfree(dynid); | |
837 | retval = count; | |
838 | break; | |
839 | } | |
840 | } | |
841 | spin_unlock(&drv->dynids.lock); | |
842 | ||
843 | return retval; | |
844 | } | |
845 | static DRIVER_ATTR_WO(remove_id); | |
846 | ||
847 | static struct attribute *vmbus_drv_attrs[] = { | |
848 | &driver_attr_new_id.attr, | |
849 | &driver_attr_remove_id.attr, | |
850 | NULL, | |
851 | }; | |
852 | ATTRIBUTE_GROUPS(vmbus_drv); | |
3037a7b6 | 853 | |
b7fc147b S |
854 | |
855 | /* | |
856 | * vmbus_match - Attempt to match the specified device to the specified driver | |
857 | */ | |
858 | static int vmbus_match(struct device *device, struct device_driver *driver) | |
859 | { | |
b7fc147b | 860 | struct hv_driver *drv = drv_to_hv_drv(driver); |
e8e27047 | 861 | struct hv_device *hv_dev = device_to_hv_device(device); |
b7fc147b | 862 | |
8981da32 DC |
863 | /* The hv_sock driver handles all hv_sock offers. */ |
864 | if (is_hvsock_channel(hv_dev->channel)) | |
865 | return drv->hvsock; | |
866 | ||
d765edbb | 867 | if (hv_vmbus_get_id(drv, hv_dev)) |
3037a7b6 | 868 | return 1; |
de632a2b | 869 | |
5841a829 | 870 | return 0; |
b7fc147b S |
871 | } |
872 | ||
f1f0d67b S |
873 | /* |
874 | * vmbus_probe - Add the new vmbus's child device | |
875 | */ | |
876 | static int vmbus_probe(struct device *child_device) | |
877 | { | |
878 | int ret = 0; | |
879 | struct hv_driver *drv = | |
880 | drv_to_hv_drv(child_device->driver); | |
9efd21e1 | 881 | struct hv_device *dev = device_to_hv_device(child_device); |
84946899 | 882 | const struct hv_vmbus_device_id *dev_id; |
f1f0d67b | 883 | |
d765edbb | 884 | dev_id = hv_vmbus_get_id(drv, dev); |
9efd21e1 | 885 | if (drv->probe) { |
84946899 | 886 | ret = drv->probe(dev, dev_id); |
b14a7b30 | 887 | if (ret != 0) |
0a46618d HJ |
888 | pr_err("probe failed for device %s (%d)\n", |
889 | dev_name(child_device), ret); | |
f1f0d67b | 890 | |
f1f0d67b | 891 | } else { |
0a46618d HJ |
892 | pr_err("probe not set for driver %s\n", |
893 | dev_name(child_device)); | |
6de925b1 | 894 | ret = -ENODEV; |
f1f0d67b S |
895 | } |
896 | return ret; | |
897 | } | |
898 | ||
c5dce3db S |
899 | /* |
900 | * vmbus_remove - Remove a vmbus device | |
901 | */ | |
902 | static int vmbus_remove(struct device *child_device) | |
903 | { | |
d15a0301 | 904 | struct hv_driver *drv; |
415b023a | 905 | struct hv_device *dev = device_to_hv_device(child_device); |
c5dce3db | 906 | |
d15a0301 S |
907 | if (child_device->driver) { |
908 | drv = drv_to_hv_drv(child_device->driver); | |
909 | if (drv->remove) | |
910 | drv->remove(dev); | |
d15a0301 | 911 | } |
c5dce3db S |
912 | |
913 | return 0; | |
914 | } | |
915 | ||
eb1bb259 S |
916 | |
917 | /* | |
918 | * vmbus_shutdown - Shutdown a vmbus device | |
919 | */ | |
920 | static void vmbus_shutdown(struct device *child_device) | |
921 | { | |
922 | struct hv_driver *drv; | |
ca6887fb | 923 | struct hv_device *dev = device_to_hv_device(child_device); |
eb1bb259 S |
924 | |
925 | ||
926 | /* The device may not be attached yet */ | |
927 | if (!child_device->driver) | |
928 | return; | |
929 | ||
930 | drv = drv_to_hv_drv(child_device->driver); | |
931 | ||
ca6887fb S |
932 | if (drv->shutdown) |
933 | drv->shutdown(dev); | |
eb1bb259 S |
934 | } |
935 | ||
83b50f83 | 936 | #ifdef CONFIG_PM_SLEEP |
271b2224 DC |
937 | /* |
938 | * vmbus_suspend - Suspend a vmbus device | |
939 | */ | |
940 | static int vmbus_suspend(struct device *child_device) | |
941 | { | |
942 | struct hv_driver *drv; | |
943 | struct hv_device *dev = device_to_hv_device(child_device); | |
944 | ||
945 | /* The device may not be attached yet */ | |
946 | if (!child_device->driver) | |
947 | return 0; | |
948 | ||
949 | drv = drv_to_hv_drv(child_device->driver); | |
950 | if (!drv->suspend) | |
951 | return -EOPNOTSUPP; | |
952 | ||
953 | return drv->suspend(dev); | |
954 | } | |
955 | ||
956 | /* | |
957 | * vmbus_resume - Resume a vmbus device | |
958 | */ | |
959 | static int vmbus_resume(struct device *child_device) | |
960 | { | |
961 | struct hv_driver *drv; | |
962 | struct hv_device *dev = device_to_hv_device(child_device); | |
963 | ||
964 | /* The device may not be attached yet */ | |
965 | if (!child_device->driver) | |
966 | return 0; | |
967 | ||
968 | drv = drv_to_hv_drv(child_device->driver); | |
969 | if (!drv->resume) | |
970 | return -EOPNOTSUPP; | |
971 | ||
972 | return drv->resume(dev); | |
973 | } | |
1a06d017 DC |
974 | #else |
975 | #define vmbus_suspend NULL | |
976 | #define vmbus_resume NULL | |
83b50f83 | 977 | #endif /* CONFIG_PM_SLEEP */ |
086e7a56 S |
978 | |
979 | /* | |
980 | * vmbus_device_release - Final callback release of the vmbus child device | |
981 | */ | |
982 | static void vmbus_device_release(struct device *device) | |
983 | { | |
e8e27047 | 984 | struct hv_device *hv_dev = device_to_hv_device(device); |
34c6801e | 985 | struct vmbus_channel *channel = hv_dev->channel; |
086e7a56 | 986 | |
af9ca6f9 BB |
987 | hv_debug_rm_dev_dir(hv_dev); |
988 | ||
54a66265 | 989 | mutex_lock(&vmbus_connection.channel_mutex); |
800b9329 | 990 | hv_process_channel_removal(channel); |
54a66265 | 991 | mutex_unlock(&vmbus_connection.channel_mutex); |
e8e27047 | 992 | kfree(hv_dev); |
086e7a56 S |
993 | } |
994 | ||
271b2224 | 995 | /* |
1a06d017 DC |
996 | * Note: we must use the "noirq" ops: see the comment before vmbus_bus_pm. |
997 | * | |
998 | * suspend_noirq/resume_noirq are set to NULL to support Suspend-to-Idle: we | |
999 | * shouldn't suspend the vmbus devices upon Suspend-to-Idle, otherwise there | |
1000 | * is no way to wake up a Generation-2 VM. | |
1001 | * | |
1002 | * The other 4 ops are for hibernation. | |
271b2224 | 1003 | */ |
1a06d017 | 1004 | |
271b2224 | 1005 | static const struct dev_pm_ops vmbus_pm = { |
1a06d017 DC |
1006 | .suspend_noirq = NULL, |
1007 | .resume_noirq = NULL, | |
1008 | .freeze_noirq = vmbus_suspend, | |
1009 | .thaw_noirq = vmbus_resume, | |
1010 | .poweroff_noirq = vmbus_suspend, | |
1011 | .restore_noirq = vmbus_resume, | |
271b2224 DC |
1012 | }; |
1013 | ||
454f18a9 | 1014 | /* The one and only one */ |
9adcac5c S |
1015 | static struct bus_type hv_bus = { |
1016 | .name = "vmbus", | |
1017 | .match = vmbus_match, | |
1018 | .shutdown = vmbus_shutdown, | |
1019 | .remove = vmbus_remove, | |
1020 | .probe = vmbus_probe, | |
1021 | .uevent = vmbus_uevent, | |
fc76936d SH |
1022 | .dev_groups = vmbus_dev_groups, |
1023 | .drv_groups = vmbus_drv_groups, | |
271b2224 | 1024 | .pm = &vmbus_pm, |
3e7ee490 HJ |
1025 | }; |
1026 | ||
bf6506f6 TT |
1027 | struct onmessage_work_context { |
1028 | struct work_struct work; | |
a276463b VK |
1029 | struct { |
1030 | struct hv_message_header header; | |
1031 | u8 payload[]; | |
1032 | } msg; | |
bf6506f6 TT |
1033 | }; |
1034 | ||
1035 | static void vmbus_onmessage_work(struct work_struct *work) | |
1036 | { | |
1037 | struct onmessage_work_context *ctx; | |
1038 | ||
09a19628 VK |
1039 | /* Do not process messages if we're in DISCONNECTED state */ |
1040 | if (vmbus_connection.conn_state == DISCONNECTED) | |
1041 | return; | |
1042 | ||
bf6506f6 TT |
1043 | ctx = container_of(work, struct onmessage_work_context, |
1044 | work); | |
5cc41500 VK |
1045 | vmbus_onmessage((struct vmbus_channel_message_header *) |
1046 | &ctx->msg.payload); | |
bf6506f6 TT |
1047 | kfree(ctx); |
1048 | } | |
1049 | ||
d81274aa | 1050 | void vmbus_on_msg_dpc(unsigned long data) |
36199a99 | 1051 | { |
37cdd991 SH |
1052 | struct hv_per_cpu_context *hv_cpu = (void *)data; |
1053 | void *page_addr = hv_cpu->synic_message_page; | |
36199a99 GKH |
1054 | struct hv_message *msg = (struct hv_message *)page_addr + |
1055 | VMBUS_MESSAGE_SINT; | |
652594c7 | 1056 | struct vmbus_channel_message_header *hdr; |
e6242fa0 | 1057 | const struct vmbus_channel_message_table_entry *entry; |
bf6506f6 | 1058 | struct onmessage_work_context *ctx; |
cd95aad5 | 1059 | u32 message_type = msg->header.message_type; |
36199a99 | 1060 | |
b0a284dc VK |
1061 | /* |
1062 | * 'enum vmbus_channel_message_type' is supposed to always be 'u32' as | |
1063 | * it is being used in 'struct vmbus_channel_message_header' definition | |
1064 | * which is supposed to match hypervisor ABI. | |
1065 | */ | |
1066 | BUILD_BUG_ON(sizeof(enum vmbus_channel_message_type) != sizeof(u32)); | |
1067 | ||
cd95aad5 | 1068 | if (message_type == HVMSG_NONE) |
7be3e169 VK |
1069 | /* no msg */ |
1070 | return; | |
652594c7 | 1071 | |
7be3e169 | 1072 | hdr = (struct vmbus_channel_message_header *)msg->u.payload; |
652594c7 | 1073 | |
c9fe0f8f VK |
1074 | trace_vmbus_on_msg_dpc(hdr); |
1075 | ||
7be3e169 VK |
1076 | if (hdr->msgtype >= CHANNELMSG_COUNT) { |
1077 | WARN_ONCE(1, "unknown msgtype=%d\n", hdr->msgtype); | |
1078 | goto msg_handled; | |
1079 | } | |
652594c7 | 1080 | |
ac0f7d42 VK |
1081 | if (msg->header.payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) { |
1082 | WARN_ONCE(1, "payload size is too large (%d)\n", | |
1083 | msg->header.payload_size); | |
1084 | goto msg_handled; | |
1085 | } | |
1086 | ||
7be3e169 | 1087 | entry = &channel_message_table[hdr->msgtype]; |
ddc9d357 DC |
1088 | |
1089 | if (!entry->message_handler) | |
1090 | goto msg_handled; | |
1091 | ||
52c7803f VK |
1092 | if (msg->header.payload_size < entry->min_payload_len) { |
1093 | WARN_ONCE(1, "message too short: msgtype=%d len=%d\n", | |
1094 | hdr->msgtype, msg->header.payload_size); | |
1095 | goto msg_handled; | |
1096 | } | |
1097 | ||
7be3e169 | 1098 | if (entry->handler_type == VMHT_BLOCKING) { |
a276463b VK |
1099 | ctx = kmalloc(sizeof(*ctx) + msg->header.payload_size, |
1100 | GFP_ATOMIC); | |
7be3e169 VK |
1101 | if (ctx == NULL) |
1102 | return; | |
652594c7 | 1103 | |
7be3e169 | 1104 | INIT_WORK(&ctx->work, vmbus_onmessage_work); |
ac0f7d42 VK |
1105 | memcpy(&ctx->msg, msg, sizeof(msg->header) + |
1106 | msg->header.payload_size); | |
652594c7 | 1107 | |
54a66265 S |
1108 | /* |
1109 | * The host can generate a rescind message while we | |
1110 | * may still be handling the original offer. We deal with | |
b9fa1b87 APM |
1111 | * this condition by relying on the synchronization provided |
1112 | * by offer_in_progress and by channel_mutex. See also the | |
1113 | * inline comments in vmbus_onoffer_rescind(). | |
54a66265 S |
1114 | */ |
1115 | switch (hdr->msgtype) { | |
1116 | case CHANNELMSG_RESCIND_CHANNELOFFER: | |
1117 | /* | |
1118 | * If we are handling the rescind message; | |
1119 | * schedule the work on the global work queue. | |
8a857c55 APM |
1120 | * |
1121 | * The OFFER message and the RESCIND message should | |
1122 | * not be handled by the same serialized work queue, | |
1123 | * because the OFFER handler may call vmbus_open(), | |
1124 | * which tries to open the channel by sending an | |
1125 | * OPEN_CHANNEL message to the host and waits for | |
1126 | * the host's response; however, if the host has | |
1127 | * rescinded the channel before it receives the | |
1128 | * OPEN_CHANNEL message, the host just silently | |
1129 | * ignores the OPEN_CHANNEL message; as a result, | |
1130 | * the guest's OFFER handler hangs for ever, if we | |
1131 | * handle the RESCIND message in the same serialized | |
1132 | * work queue: the RESCIND handler can not start to | |
1133 | * run before the OFFER handler finishes. | |
54a66265 | 1134 | */ |
b9fa1b87 | 1135 | schedule_work(&ctx->work); |
54a66265 S |
1136 | break; |
1137 | ||
1138 | case CHANNELMSG_OFFERCHANNEL: | |
b9fa1b87 APM |
1139 | /* |
1140 | * The host sends the offer message of a given channel | |
1141 | * before sending the rescind message of the same | |
1142 | * channel. These messages are sent to the guest's | |
1143 | * connect CPU; the guest then starts processing them | |
1144 | * in the tasklet handler on this CPU: | |
1145 | * | |
1146 | * VMBUS_CONNECT_CPU | |
1147 | * | |
1148 | * [vmbus_on_msg_dpc()] | |
1149 | * atomic_inc() // CHANNELMSG_OFFERCHANNEL | |
1150 | * queue_work() | |
1151 | * ... | |
1152 | * [vmbus_on_msg_dpc()] | |
1153 | * schedule_work() // CHANNELMSG_RESCIND_CHANNELOFFER | |
1154 | * | |
1155 | * We rely on the memory-ordering properties of the | |
1156 | * queue_work() and schedule_work() primitives, which | |
1157 | * guarantee that the atomic increment will be visible | |
1158 | * to the CPUs which will execute the offer & rescind | |
1159 | * works by the time these works will start execution. | |
1160 | */ | |
54a66265 | 1161 | atomic_inc(&vmbus_connection.offer_in_progress); |
b9fa1b87 | 1162 | fallthrough; |
54a66265 S |
1163 | |
1164 | default: | |
1165 | queue_work(vmbus_connection.work_queue, &ctx->work); | |
1166 | } | |
7be3e169 VK |
1167 | } else |
1168 | entry->message_handler(hdr); | |
36199a99 | 1169 | |
652594c7 | 1170 | msg_handled: |
cd95aad5 | 1171 | vmbus_signal_eom(msg, message_type); |
36199a99 GKH |
1172 | } |
1173 | ||
83b50f83 | 1174 | #ifdef CONFIG_PM_SLEEP |
1f48dcf1 DC |
1175 | /* |
1176 | * Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for | |
1177 | * hibernation, because hv_sock connections can not persist across hibernation. | |
1178 | */ | |
1179 | static void vmbus_force_channel_rescinded(struct vmbus_channel *channel) | |
1180 | { | |
1181 | struct onmessage_work_context *ctx; | |
1182 | struct vmbus_channel_rescind_offer *rescind; | |
1183 | ||
1184 | WARN_ON(!is_hvsock_channel(channel)); | |
1185 | ||
1186 | /* | |
a276463b | 1187 | * Allocation size is small and the allocation should really not fail, |
1f48dcf1 DC |
1188 | * otherwise the state of the hv_sock connections ends up in limbo. |
1189 | */ | |
a276463b VK |
1190 | ctx = kzalloc(sizeof(*ctx) + sizeof(*rescind), |
1191 | GFP_KERNEL | __GFP_NOFAIL); | |
1f48dcf1 DC |
1192 | |
1193 | /* | |
1194 | * So far, these are not really used by Linux. Just set them to the | |
1195 | * reasonable values conforming to the definitions of the fields. | |
1196 | */ | |
1197 | ctx->msg.header.message_type = 1; | |
1198 | ctx->msg.header.payload_size = sizeof(*rescind); | |
1199 | ||
1200 | /* These values are actually used by Linux. */ | |
a276463b | 1201 | rescind = (struct vmbus_channel_rescind_offer *)ctx->msg.payload; |
1f48dcf1 DC |
1202 | rescind->header.msgtype = CHANNELMSG_RESCIND_CHANNELOFFER; |
1203 | rescind->child_relid = channel->offermsg.child_relid; | |
1204 | ||
1205 | INIT_WORK(&ctx->work, vmbus_onmessage_work); | |
1206 | ||
b9fa1b87 | 1207 | queue_work(vmbus_connection.work_queue, &ctx->work); |
1f48dcf1 | 1208 | } |
83b50f83 | 1209 | #endif /* CONFIG_PM_SLEEP */ |
631e63a9 SH |
1210 | |
1211 | /* | |
1212 | * Schedule all channels with events pending | |
1213 | */ | |
1214 | static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu) | |
1215 | { | |
1216 | unsigned long *recv_int_page; | |
1217 | u32 maxbits, relid; | |
1218 | ||
1219 | if (vmbus_proto_version < VERSION_WIN8) { | |
1220 | maxbits = MAX_NUM_CHANNELS_SUPPORTED; | |
1221 | recv_int_page = vmbus_connection.recv_int_page; | |
1222 | } else { | |
1223 | /* | |
1224 | * When the host is win8 and beyond, the event page | |
1225 | * can be directly checked to get the id of the channel | |
1226 | * that has the interrupt pending. | |
1227 | */ | |
1228 | void *page_addr = hv_cpu->synic_event_page; | |
1229 | union hv_synic_event_flags *event | |
1230 | = (union hv_synic_event_flags *)page_addr + | |
1231 | VMBUS_MESSAGE_SINT; | |
1232 | ||
1233 | maxbits = HV_EVENT_FLAGS_COUNT; | |
1234 | recv_int_page = event->flags; | |
1235 | } | |
1236 | ||
1237 | if (unlikely(!recv_int_page)) | |
1238 | return; | |
1239 | ||
1240 | for_each_set_bit(relid, recv_int_page, maxbits) { | |
9403b66e | 1241 | void (*callback_fn)(void *context); |
631e63a9 SH |
1242 | struct vmbus_channel *channel; |
1243 | ||
1244 | if (!sync_test_and_clear_bit(relid, recv_int_page)) | |
1245 | continue; | |
1246 | ||
1247 | /* Special case - vmbus channel protocol msg */ | |
1248 | if (relid == 0) | |
1249 | continue; | |
1250 | ||
8b6a877c APM |
1251 | /* |
1252 | * Pairs with the kfree_rcu() in vmbus_chan_release(). | |
1253 | * Guarantees that the channel data structure doesn't | |
1254 | * get freed while the channel pointer below is being | |
1255 | * dereferenced. | |
1256 | */ | |
8200f208 SH |
1257 | rcu_read_lock(); |
1258 | ||
631e63a9 | 1259 | /* Find channel based on relid */ |
8b6a877c APM |
1260 | channel = relid2channel(relid); |
1261 | if (channel == NULL) | |
1262 | goto sched_unlock_rcu; | |
b71e3282 | 1263 | |
8b6a877c APM |
1264 | if (channel->rescind) |
1265 | goto sched_unlock_rcu; | |
6f3d791f | 1266 | |
9403b66e APM |
1267 | /* |
1268 | * Make sure that the ring buffer data structure doesn't get | |
1269 | * freed while we dereference the ring buffer pointer. Test | |
1270 | * for the channel's onchannel_callback being NULL within a | |
1271 | * sched_lock critical section. See also the inline comments | |
1272 | * in vmbus_reset_channel_cb(). | |
1273 | */ | |
1274 | spin_lock(&channel->sched_lock); | |
991f8f1c | 1275 | |
9403b66e APM |
1276 | callback_fn = channel->onchannel_callback; |
1277 | if (unlikely(callback_fn == NULL)) | |
1278 | goto sched_unlock; | |
6981fbf3 | 1279 | |
8b6a877c | 1280 | trace_vmbus_chan_sched(channel); |
b71e3282 | 1281 | |
8b6a877c | 1282 | ++channel->interrupts; |
6981fbf3 | 1283 | |
8b6a877c APM |
1284 | switch (channel->callback_mode) { |
1285 | case HV_CALL_ISR: | |
9403b66e | 1286 | (*callback_fn)(channel->channel_callback_context); |
8b6a877c | 1287 | break; |
b71e3282 | 1288 | |
8b6a877c APM |
1289 | case HV_CALL_BATCHED: |
1290 | hv_begin_read(&channel->inbound); | |
1291 | fallthrough; | |
1292 | case HV_CALL_DIRECT: | |
1293 | tasklet_schedule(&channel->callback_event); | |
631e63a9 | 1294 | } |
8200f208 | 1295 | |
9403b66e APM |
1296 | sched_unlock: |
1297 | spin_unlock(&channel->sched_lock); | |
8b6a877c | 1298 | sched_unlock_rcu: |
8200f208 | 1299 | rcu_read_unlock(); |
631e63a9 SH |
1300 | } |
1301 | } | |
1302 | ||
76d388cd | 1303 | static void vmbus_isr(void) |
36199a99 | 1304 | { |
37cdd991 SH |
1305 | struct hv_per_cpu_context *hv_cpu |
1306 | = this_cpu_ptr(hv_context.cpu_context); | |
1307 | void *page_addr = hv_cpu->synic_event_page; | |
36199a99 GKH |
1308 | struct hv_message *msg; |
1309 | union hv_synic_event_flags *event; | |
ae4636e6 | 1310 | bool handled = false; |
36199a99 | 1311 | |
37cdd991 | 1312 | if (unlikely(page_addr == NULL)) |
76d388cd | 1313 | return; |
5ab05951 S |
1314 | |
1315 | event = (union hv_synic_event_flags *)page_addr + | |
1316 | VMBUS_MESSAGE_SINT; | |
7341d908 S |
1317 | /* |
1318 | * Check for events before checking for messages. This is the order | |
1319 | * in which events and messages are checked in Windows guests on | |
1320 | * Hyper-V, and the Windows team suggested we do the same. | |
1321 | */ | |
36199a99 | 1322 | |
6552ecd7 S |
1323 | if ((vmbus_proto_version == VERSION_WS2008) || |
1324 | (vmbus_proto_version == VERSION_WIN7)) { | |
36199a99 | 1325 | |
6552ecd7 | 1326 | /* Since we are a child, we only need to check bit 0 */ |
5c1bec61 | 1327 | if (sync_test_and_clear_bit(0, event->flags)) |
6552ecd7 | 1328 | handled = true; |
6552ecd7 S |
1329 | } else { |
1330 | /* | |
1331 | * Our host is win8 or above. The signaling mechanism | |
1332 | * has changed and we can directly look at the event page. | |
1333 | * If bit n is set then we have an interrup on the channel | |
1334 | * whose id is n. | |
1335 | */ | |
ae4636e6 | 1336 | handled = true; |
ae4636e6 | 1337 | } |
793be9c7 | 1338 | |
6552ecd7 | 1339 | if (handled) |
631e63a9 | 1340 | vmbus_chan_sched(hv_cpu); |
6552ecd7 | 1341 | |
37cdd991 | 1342 | page_addr = hv_cpu->synic_message_page; |
7341d908 S |
1343 | msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; |
1344 | ||
1345 | /* Check if there are actual msgs to be processed */ | |
4061ed9e | 1346 | if (msg->header.message_type != HVMSG_NONE) { |
fd1fea68 MK |
1347 | if (msg->header.message_type == HVMSG_TIMER_EXPIRED) { |
1348 | hv_stimer0_isr(); | |
1349 | vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED); | |
1350 | } else | |
37cdd991 | 1351 | tasklet_schedule(&hv_cpu->msg_dpc); |
4061ed9e | 1352 | } |
4b44f2d1 | 1353 | |
626b901f | 1354 | add_interrupt_randomness(hv_get_vector(), 0); |
793be9c7 S |
1355 | } |
1356 | ||
81b18bce SM |
1357 | /* |
1358 | * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg | |
1359 | * buffer and call into Hyper-V to transfer the data. | |
1360 | */ | |
1361 | static void hv_kmsg_dump(struct kmsg_dumper *dumper, | |
1362 | enum kmsg_dump_reason reason) | |
1363 | { | |
1364 | size_t bytes_written; | |
1365 | phys_addr_t panic_pa; | |
1366 | ||
1367 | /* We are only interested in panics. */ | |
1368 | if ((reason != KMSG_DUMP_PANIC) || (!sysctl_record_panic_msg)) | |
1369 | return; | |
1370 | ||
1371 | panic_pa = virt_to_phys(hv_panic_page); | |
1372 | ||
1373 | /* | |
1374 | * Write dump contents to the page. No need to synchronize; panic should | |
1375 | * be single-threaded. | |
1376 | */ | |
77b48bea | 1377 | kmsg_dump_get_buffer(dumper, false, hv_panic_page, HV_HYP_PAGE_SIZE, |
ddcaf3ca SM |
1378 | &bytes_written); |
1379 | if (bytes_written) | |
1380 | hyperv_report_panic_msg(panic_pa, bytes_written); | |
81b18bce SM |
1381 | } |
1382 | ||
1383 | static struct kmsg_dumper hv_kmsg_dumper = { | |
1384 | .dump = hv_kmsg_dump, | |
1385 | }; | |
1386 | ||
1387 | static struct ctl_table_header *hv_ctl_table_hdr; | |
81b18bce SM |
1388 | |
1389 | /* | |
1390 | * sysctl option to allow the user to control whether kmsg data should be | |
1391 | * reported to Hyper-V on panic. | |
1392 | */ | |
1393 | static struct ctl_table hv_ctl_table[] = { | |
1394 | { | |
1395 | .procname = "hyperv_record_panic_msg", | |
1396 | .data = &sysctl_record_panic_msg, | |
1397 | .maxlen = sizeof(int), | |
1398 | .mode = 0644, | |
1399 | .proc_handler = proc_dointvec_minmax, | |
eec4844f MC |
1400 | .extra1 = SYSCTL_ZERO, |
1401 | .extra2 = SYSCTL_ONE | |
81b18bce SM |
1402 | }, |
1403 | {} | |
1404 | }; | |
1405 | ||
1406 | static struct ctl_table hv_root_table[] = { | |
1407 | { | |
1408 | .procname = "kernel", | |
1409 | .mode = 0555, | |
1410 | .child = hv_ctl_table | |
1411 | }, | |
1412 | {} | |
1413 | }; | |
e513229b | 1414 | |
3e189519 | 1415 | /* |
90c9960e GKH |
1416 | * vmbus_bus_init -Main vmbus driver initialization routine. |
1417 | * | |
1418 | * Here, we | |
0686e4f4 | 1419 | * - initialize the vmbus driver context |
0686e4f4 | 1420 | * - invoke the vmbus hv main init routine |
0686e4f4 | 1421 | * - retrieve the channel offers |
90c9960e | 1422 | */ |
efc26722 | 1423 | static int vmbus_bus_init(void) |
3e7ee490 | 1424 | { |
90c9960e | 1425 | int ret; |
3e7ee490 | 1426 | |
6d26e38f | 1427 | ret = hv_init(); |
90c9960e | 1428 | if (ret != 0) { |
0a46618d | 1429 | pr_err("Unable to initialize the hypervisor - 0x%x\n", ret); |
d6c1c5de | 1430 | return ret; |
3e7ee490 HJ |
1431 | } |
1432 | ||
9adcac5c | 1433 | ret = bus_register(&hv_bus); |
d6c1c5de | 1434 | if (ret) |
d6f3609d | 1435 | return ret; |
3e7ee490 | 1436 | |
626b901f MK |
1437 | ret = hv_setup_vmbus_irq(vmbus_irq, vmbus_isr); |
1438 | if (ret) | |
1439 | goto err_setup; | |
3e7ee490 | 1440 | |
2608fb65 JW |
1441 | ret = hv_synic_alloc(); |
1442 | if (ret) | |
1443 | goto err_alloc; | |
fd1fea68 | 1444 | |
800b6902 | 1445 | /* |
fd1fea68 MK |
1446 | * Initialize the per-cpu interrupt state and stimer state. |
1447 | * Then connect to the host. | |
800b6902 | 1448 | */ |
4a5f3cde | 1449 | ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online", |
76d36ab7 VK |
1450 | hv_synic_init, hv_synic_cleanup); |
1451 | if (ret < 0) | |
fd1fea68 | 1452 | goto err_cpuhp; |
76d36ab7 VK |
1453 | hyperv_cpuhp_online = ret; |
1454 | ||
800b6902 | 1455 | ret = vmbus_connect(); |
8b9987e9 | 1456 | if (ret) |
17efbee8 | 1457 | goto err_connect; |
800b6902 | 1458 | |
96c1d058 NM |
1459 | /* |
1460 | * Only register if the crash MSRs are available | |
1461 | */ | |
cc2dd402 | 1462 | if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { |
81b18bce SM |
1463 | u64 hyperv_crash_ctl; |
1464 | /* | |
1465 | * Sysctl registration is not fatal, since by default | |
1466 | * reporting is enabled. | |
1467 | */ | |
1468 | hv_ctl_table_hdr = register_sysctl_table(hv_root_table); | |
1469 | if (!hv_ctl_table_hdr) | |
1470 | pr_err("Hyper-V: sysctl table register error"); | |
1471 | ||
1472 | /* | |
1473 | * Register for panic kmsg callback only if the right | |
1474 | * capability is supported by the hypervisor. | |
1475 | */ | |
9d9c9656 | 1476 | hv_get_crash_ctl(hyperv_crash_ctl); |
81b18bce | 1477 | if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG) { |
53edce00 | 1478 | hv_panic_page = (void *)hv_alloc_hyperv_zeroed_page(); |
81b18bce SM |
1479 | if (hv_panic_page) { |
1480 | ret = kmsg_dump_register(&hv_kmsg_dumper); | |
7f11a2cc | 1481 | if (ret) { |
81b18bce SM |
1482 | pr_err("Hyper-V: kmsg dump register " |
1483 | "error 0x%x\n", ret); | |
7f11a2cc TL |
1484 | hv_free_hyperv_page( |
1485 | (unsigned long)hv_panic_page); | |
1486 | hv_panic_page = NULL; | |
1487 | } | |
81b18bce SM |
1488 | } else |
1489 | pr_err("Hyper-V: panic message page memory " | |
1490 | "allocation failed"); | |
1491 | } | |
1492 | ||
510f7aef | 1493 | register_die_notifier(&hyperv_die_block); |
96c1d058 NM |
1494 | } |
1495 | ||
74347a99 TL |
1496 | /* |
1497 | * Always register the panic notifier because we need to unload | |
1498 | * the VMbus channel connection to prevent any VMbus | |
1499 | * activity after the VM panics. | |
1500 | */ | |
1501 | atomic_notifier_chain_register(&panic_notifier_list, | |
1502 | &hyperv_panic_block); | |
1503 | ||
2d6e882b | 1504 | vmbus_request_offers(); |
8b5d6d3b | 1505 | |
d6c1c5de | 1506 | return 0; |
8b9987e9 | 1507 | |
17efbee8 | 1508 | err_connect: |
76d36ab7 | 1509 | cpuhp_remove_state(hyperv_cpuhp_online); |
fd1fea68 | 1510 | err_cpuhp: |
2608fb65 | 1511 | hv_synic_free(); |
4df4cb9e | 1512 | err_alloc: |
76d388cd | 1513 | hv_remove_vmbus_irq(); |
626b901f | 1514 | err_setup: |
8b9987e9 | 1515 | bus_unregister(&hv_bus); |
8afc06dd SM |
1516 | unregister_sysctl_table(hv_ctl_table_hdr); |
1517 | hv_ctl_table_hdr = NULL; | |
8b9987e9 | 1518 | return ret; |
3e7ee490 HJ |
1519 | } |
1520 | ||
90c9960e | 1521 | /** |
35464483 JO |
1522 | * __vmbus_child_driver_register() - Register a vmbus's driver |
1523 | * @hv_driver: Pointer to driver structure you want to register | |
768fa219 GKH |
1524 | * @owner: owner module of the drv |
1525 | * @mod_name: module name string | |
3e189519 HJ |
1526 | * |
1527 | * Registers the given driver with Linux through the 'driver_register()' call | |
768fa219 | 1528 | * and sets up the hyper-v vmbus handling for this driver. |
3e189519 HJ |
1529 | * It will return the state of the 'driver_register()' call. |
1530 | * | |
90c9960e | 1531 | */ |
768fa219 | 1532 | int __vmbus_driver_register(struct hv_driver *hv_driver, struct module *owner, const char *mod_name) |
3e7ee490 | 1533 | { |
5d48a1c2 | 1534 | int ret; |
3e7ee490 | 1535 | |
768fa219 | 1536 | pr_info("registering driver %s\n", hv_driver->name); |
3e7ee490 | 1537 | |
cf6a2eac S |
1538 | ret = vmbus_exists(); |
1539 | if (ret < 0) | |
1540 | return ret; | |
1541 | ||
768fa219 GKH |
1542 | hv_driver->driver.name = hv_driver->name; |
1543 | hv_driver->driver.owner = owner; | |
1544 | hv_driver->driver.mod_name = mod_name; | |
1545 | hv_driver->driver.bus = &hv_bus; | |
3e7ee490 | 1546 | |
fc76936d SH |
1547 | spin_lock_init(&hv_driver->dynids.lock); |
1548 | INIT_LIST_HEAD(&hv_driver->dynids.list); | |
1549 | ||
768fa219 | 1550 | ret = driver_register(&hv_driver->driver); |
3e7ee490 | 1551 | |
5d48a1c2 | 1552 | return ret; |
3e7ee490 | 1553 | } |
768fa219 | 1554 | EXPORT_SYMBOL_GPL(__vmbus_driver_register); |
3e7ee490 | 1555 | |
90c9960e | 1556 | /** |
768fa219 | 1557 | * vmbus_driver_unregister() - Unregister a vmbus's driver |
35464483 JO |
1558 | * @hv_driver: Pointer to driver structure you want to |
1559 | * un-register | |
3e189519 | 1560 | * |
768fa219 GKH |
1561 | * Un-register the given driver that was previous registered with a call to |
1562 | * vmbus_driver_register() | |
90c9960e | 1563 | */ |
768fa219 | 1564 | void vmbus_driver_unregister(struct hv_driver *hv_driver) |
3e7ee490 | 1565 | { |
768fa219 | 1566 | pr_info("unregistering driver %s\n", hv_driver->name); |
3e7ee490 | 1567 | |
fc76936d | 1568 | if (!vmbus_exists()) { |
8f257a14 | 1569 | driver_unregister(&hv_driver->driver); |
fc76936d SH |
1570 | vmbus_free_dynids(hv_driver); |
1571 | } | |
3e7ee490 | 1572 | } |
768fa219 | 1573 | EXPORT_SYMBOL_GPL(vmbus_driver_unregister); |
3e7ee490 | 1574 | |
c2e5df61 SH |
1575 | |
1576 | /* | |
1577 | * Called when last reference to channel is gone. | |
1578 | */ | |
1579 | static void vmbus_chan_release(struct kobject *kobj) | |
1580 | { | |
1581 | struct vmbus_channel *channel | |
1582 | = container_of(kobj, struct vmbus_channel, kobj); | |
1583 | ||
1584 | kfree_rcu(channel, rcu); | |
1585 | } | |
1586 | ||
1587 | struct vmbus_chan_attribute { | |
1588 | struct attribute attr; | |
14948e39 | 1589 | ssize_t (*show)(struct vmbus_channel *chan, char *buf); |
c2e5df61 SH |
1590 | ssize_t (*store)(struct vmbus_channel *chan, |
1591 | const char *buf, size_t count); | |
1592 | }; | |
1593 | #define VMBUS_CHAN_ATTR(_name, _mode, _show, _store) \ | |
1594 | struct vmbus_chan_attribute chan_attr_##_name \ | |
1595 | = __ATTR(_name, _mode, _show, _store) | |
1596 | #define VMBUS_CHAN_ATTR_RW(_name) \ | |
1597 | struct vmbus_chan_attribute chan_attr_##_name = __ATTR_RW(_name) | |
1598 | #define VMBUS_CHAN_ATTR_RO(_name) \ | |
1599 | struct vmbus_chan_attribute chan_attr_##_name = __ATTR_RO(_name) | |
1600 | #define VMBUS_CHAN_ATTR_WO(_name) \ | |
1601 | struct vmbus_chan_attribute chan_attr_##_name = __ATTR_WO(_name) | |
1602 | ||
1603 | static ssize_t vmbus_chan_attr_show(struct kobject *kobj, | |
1604 | struct attribute *attr, char *buf) | |
1605 | { | |
1606 | const struct vmbus_chan_attribute *attribute | |
1607 | = container_of(attr, struct vmbus_chan_attribute, attr); | |
14948e39 | 1608 | struct vmbus_channel *chan |
c2e5df61 SH |
1609 | = container_of(kobj, struct vmbus_channel, kobj); |
1610 | ||
1611 | if (!attribute->show) | |
1612 | return -EIO; | |
1613 | ||
1614 | return attribute->show(chan, buf); | |
1615 | } | |
1616 | ||
75278105 APM |
1617 | static ssize_t vmbus_chan_attr_store(struct kobject *kobj, |
1618 | struct attribute *attr, const char *buf, | |
1619 | size_t count) | |
1620 | { | |
1621 | const struct vmbus_chan_attribute *attribute | |
1622 | = container_of(attr, struct vmbus_chan_attribute, attr); | |
1623 | struct vmbus_channel *chan | |
1624 | = container_of(kobj, struct vmbus_channel, kobj); | |
1625 | ||
1626 | if (!attribute->store) | |
1627 | return -EIO; | |
1628 | ||
1629 | return attribute->store(chan, buf, count); | |
1630 | } | |
1631 | ||
c2e5df61 SH |
1632 | static const struct sysfs_ops vmbus_chan_sysfs_ops = { |
1633 | .show = vmbus_chan_attr_show, | |
75278105 | 1634 | .store = vmbus_chan_attr_store, |
c2e5df61 SH |
1635 | }; |
1636 | ||
14948e39 | 1637 | static ssize_t out_mask_show(struct vmbus_channel *channel, char *buf) |
c2e5df61 | 1638 | { |
14948e39 KB |
1639 | struct hv_ring_buffer_info *rbi = &channel->outbound; |
1640 | ssize_t ret; | |
c2e5df61 | 1641 | |
14948e39 KB |
1642 | mutex_lock(&rbi->ring_buffer_mutex); |
1643 | if (!rbi->ring_buffer) { | |
1644 | mutex_unlock(&rbi->ring_buffer_mutex); | |
fcedbb29 | 1645 | return -EINVAL; |
14948e39 | 1646 | } |
fcedbb29 | 1647 | |
14948e39 KB |
1648 | ret = sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask); |
1649 | mutex_unlock(&rbi->ring_buffer_mutex); | |
1650 | return ret; | |
c2e5df61 | 1651 | } |
875c362b | 1652 | static VMBUS_CHAN_ATTR_RO(out_mask); |
c2e5df61 | 1653 | |
14948e39 | 1654 | static ssize_t in_mask_show(struct vmbus_channel *channel, char *buf) |
c2e5df61 | 1655 | { |
14948e39 KB |
1656 | struct hv_ring_buffer_info *rbi = &channel->inbound; |
1657 | ssize_t ret; | |
c2e5df61 | 1658 | |
14948e39 KB |
1659 | mutex_lock(&rbi->ring_buffer_mutex); |
1660 | if (!rbi->ring_buffer) { | |
1661 | mutex_unlock(&rbi->ring_buffer_mutex); | |
fcedbb29 | 1662 | return -EINVAL; |
14948e39 | 1663 | } |
fcedbb29 | 1664 | |
14948e39 KB |
1665 | ret = sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask); |
1666 | mutex_unlock(&rbi->ring_buffer_mutex); | |
1667 | return ret; | |
c2e5df61 | 1668 | } |
875c362b | 1669 | static VMBUS_CHAN_ATTR_RO(in_mask); |
c2e5df61 | 1670 | |
14948e39 | 1671 | static ssize_t read_avail_show(struct vmbus_channel *channel, char *buf) |
c2e5df61 | 1672 | { |
14948e39 KB |
1673 | struct hv_ring_buffer_info *rbi = &channel->inbound; |
1674 | ssize_t ret; | |
c2e5df61 | 1675 | |
14948e39 KB |
1676 | mutex_lock(&rbi->ring_buffer_mutex); |
1677 | if (!rbi->ring_buffer) { | |
1678 | mutex_unlock(&rbi->ring_buffer_mutex); | |
fcedbb29 | 1679 | return -EINVAL; |
14948e39 | 1680 | } |
fcedbb29 | 1681 | |
14948e39 KB |
1682 | ret = sprintf(buf, "%u\n", hv_get_bytes_to_read(rbi)); |
1683 | mutex_unlock(&rbi->ring_buffer_mutex); | |
1684 | return ret; | |
c2e5df61 | 1685 | } |
875c362b | 1686 | static VMBUS_CHAN_ATTR_RO(read_avail); |
c2e5df61 | 1687 | |
14948e39 | 1688 | static ssize_t write_avail_show(struct vmbus_channel *channel, char *buf) |
c2e5df61 | 1689 | { |
14948e39 KB |
1690 | struct hv_ring_buffer_info *rbi = &channel->outbound; |
1691 | ssize_t ret; | |
c2e5df61 | 1692 | |
14948e39 KB |
1693 | mutex_lock(&rbi->ring_buffer_mutex); |
1694 | if (!rbi->ring_buffer) { | |
1695 | mutex_unlock(&rbi->ring_buffer_mutex); | |
fcedbb29 | 1696 | return -EINVAL; |
14948e39 | 1697 | } |
fcedbb29 | 1698 | |
14948e39 KB |
1699 | ret = sprintf(buf, "%u\n", hv_get_bytes_to_write(rbi)); |
1700 | mutex_unlock(&rbi->ring_buffer_mutex); | |
1701 | return ret; | |
c2e5df61 | 1702 | } |
875c362b | 1703 | static VMBUS_CHAN_ATTR_RO(write_avail); |
c2e5df61 | 1704 | |
75278105 | 1705 | static ssize_t target_cpu_show(struct vmbus_channel *channel, char *buf) |
c2e5df61 SH |
1706 | { |
1707 | return sprintf(buf, "%u\n", channel->target_cpu); | |
1708 | } | |
75278105 APM |
1709 | static ssize_t target_cpu_store(struct vmbus_channel *channel, |
1710 | const char *buf, size_t count) | |
1711 | { | |
afaa33da | 1712 | u32 target_cpu, origin_cpu; |
75278105 | 1713 | ssize_t ret = count; |
75278105 APM |
1714 | |
1715 | if (vmbus_proto_version < VERSION_WIN10_V4_1) | |
1716 | return -EIO; | |
1717 | ||
1718 | if (sscanf(buf, "%uu", &target_cpu) != 1) | |
1719 | return -EIO; | |
1720 | ||
1721 | /* Validate target_cpu for the cpumask_test_cpu() operation below. */ | |
1722 | if (target_cpu >= nr_cpumask_bits) | |
1723 | return -EINVAL; | |
1724 | ||
1725 | /* No CPUs should come up or down during this. */ | |
1726 | cpus_read_lock(); | |
1727 | ||
0a968209 | 1728 | if (!cpu_online(target_cpu)) { |
75278105 APM |
1729 | cpus_read_unlock(); |
1730 | return -EINVAL; | |
1731 | } | |
1732 | ||
1733 | /* | |
1734 | * Synchronizes target_cpu_store() and channel closure: | |
1735 | * | |
1736 | * { Initially: state = CHANNEL_OPENED } | |
1737 | * | |
1738 | * CPU1 CPU2 | |
1739 | * | |
1740 | * [target_cpu_store()] [vmbus_disconnect_ring()] | |
1741 | * | |
1742 | * LOCK channel_mutex LOCK channel_mutex | |
1743 | * LOAD r1 = state LOAD r2 = state | |
1744 | * IF (r1 == CHANNEL_OPENED) IF (r2 == CHANNEL_OPENED) | |
1745 | * SEND MODIFYCHANNEL STORE state = CHANNEL_OPEN | |
1746 | * [...] SEND CLOSECHANNEL | |
1747 | * UNLOCK channel_mutex UNLOCK channel_mutex | |
1748 | * | |
1749 | * Forbids: r1 == r2 == CHANNEL_OPENED (i.e., CPU1's LOCK precedes | |
1750 | * CPU2's LOCK) && CPU2's SEND precedes CPU1's SEND | |
1751 | * | |
1752 | * Note. The host processes the channel messages "sequentially", in | |
1753 | * the order in which they are received on a per-partition basis. | |
1754 | */ | |
1755 | mutex_lock(&vmbus_connection.channel_mutex); | |
1756 | ||
1757 | /* | |
1758 | * Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels; | |
1759 | * avoid sending the message and fail here for such channels. | |
1760 | */ | |
1761 | if (channel->state != CHANNEL_OPENED_STATE) { | |
1762 | ret = -EIO; | |
1763 | goto cpu_store_unlock; | |
1764 | } | |
1765 | ||
afaa33da APM |
1766 | origin_cpu = channel->target_cpu; |
1767 | if (target_cpu == origin_cpu) | |
75278105 APM |
1768 | goto cpu_store_unlock; |
1769 | ||
1770 | if (vmbus_send_modifychannel(channel->offermsg.child_relid, | |
1771 | hv_cpu_number_to_vp_number(target_cpu))) { | |
1772 | ret = -EIO; | |
1773 | goto cpu_store_unlock; | |
1774 | } | |
1775 | ||
1776 | /* | |
1777 | * Warning. At this point, there is *no* guarantee that the host will | |
1778 | * have successfully processed the vmbus_send_modifychannel() request. | |
1779 | * See the header comment of vmbus_send_modifychannel() for more info. | |
1780 | * | |
1781 | * Lags in the processing of the above vmbus_send_modifychannel() can | |
1782 | * result in missed interrupts if the "old" target CPU is taken offline | |
1783 | * before Hyper-V starts sending interrupts to the "new" target CPU. | |
1784 | * But apart from this offlining scenario, the code tolerates such | |
1785 | * lags. It will function correctly even if a channel interrupt comes | |
1786 | * in on a CPU that is different from the channel target_cpu value. | |
1787 | */ | |
1788 | ||
1789 | channel->target_cpu = target_cpu; | |
75278105 | 1790 | |
afaa33da APM |
1791 | /* See init_vp_index(). */ |
1792 | if (hv_is_perf_channel(channel)) | |
1793 | hv_update_alloced_cpus(origin_cpu, target_cpu); | |
1794 | ||
1795 | /* Currently set only for storvsc channels. */ | |
1796 | if (channel->change_target_cpu_callback) { | |
1797 | (*channel->change_target_cpu_callback)(channel, | |
1798 | origin_cpu, target_cpu); | |
1799 | } | |
1800 | ||
75278105 APM |
1801 | cpu_store_unlock: |
1802 | mutex_unlock(&vmbus_connection.channel_mutex); | |
1803 | cpus_read_unlock(); | |
1804 | return ret; | |
1805 | } | |
1806 | static VMBUS_CHAN_ATTR(cpu, 0644, target_cpu_show, target_cpu_store); | |
c2e5df61 | 1807 | |
14948e39 | 1808 | static ssize_t channel_pending_show(struct vmbus_channel *channel, |
c2e5df61 SH |
1809 | char *buf) |
1810 | { | |
1811 | return sprintf(buf, "%d\n", | |
1812 | channel_pending(channel, | |
1813 | vmbus_connection.monitor_pages[1])); | |
1814 | } | |
875c362b | 1815 | static VMBUS_CHAN_ATTR(pending, S_IRUGO, channel_pending_show, NULL); |
c2e5df61 | 1816 | |
14948e39 | 1817 | static ssize_t channel_latency_show(struct vmbus_channel *channel, |
c2e5df61 SH |
1818 | char *buf) |
1819 | { | |
1820 | return sprintf(buf, "%d\n", | |
1821 | channel_latency(channel, | |
1822 | vmbus_connection.monitor_pages[1])); | |
1823 | } | |
875c362b | 1824 | static VMBUS_CHAN_ATTR(latency, S_IRUGO, channel_latency_show, NULL); |
c2e5df61 | 1825 | |
14948e39 | 1826 | static ssize_t channel_interrupts_show(struct vmbus_channel *channel, char *buf) |
6981fbf3 SH |
1827 | { |
1828 | return sprintf(buf, "%llu\n", channel->interrupts); | |
1829 | } | |
875c362b | 1830 | static VMBUS_CHAN_ATTR(interrupts, S_IRUGO, channel_interrupts_show, NULL); |
6981fbf3 | 1831 | |
14948e39 | 1832 | static ssize_t channel_events_show(struct vmbus_channel *channel, char *buf) |
6981fbf3 SH |
1833 | { |
1834 | return sprintf(buf, "%llu\n", channel->sig_events); | |
1835 | } | |
875c362b | 1836 | static VMBUS_CHAN_ATTR(events, S_IRUGO, channel_events_show, NULL); |
6981fbf3 | 1837 | |
14948e39 | 1838 | static ssize_t channel_intr_in_full_show(struct vmbus_channel *channel, |
396ae57e KB |
1839 | char *buf) |
1840 | { | |
1841 | return sprintf(buf, "%llu\n", | |
1842 | (unsigned long long)channel->intr_in_full); | |
1843 | } | |
1844 | static VMBUS_CHAN_ATTR(intr_in_full, 0444, channel_intr_in_full_show, NULL); | |
1845 | ||
14948e39 | 1846 | static ssize_t channel_intr_out_empty_show(struct vmbus_channel *channel, |
396ae57e KB |
1847 | char *buf) |
1848 | { | |
1849 | return sprintf(buf, "%llu\n", | |
1850 | (unsigned long long)channel->intr_out_empty); | |
1851 | } | |
1852 | static VMBUS_CHAN_ATTR(intr_out_empty, 0444, channel_intr_out_empty_show, NULL); | |
1853 | ||
14948e39 | 1854 | static ssize_t channel_out_full_first_show(struct vmbus_channel *channel, |
396ae57e KB |
1855 | char *buf) |
1856 | { | |
1857 | return sprintf(buf, "%llu\n", | |
1858 | (unsigned long long)channel->out_full_first); | |
1859 | } | |
1860 | static VMBUS_CHAN_ATTR(out_full_first, 0444, channel_out_full_first_show, NULL); | |
1861 | ||
14948e39 | 1862 | static ssize_t channel_out_full_total_show(struct vmbus_channel *channel, |
396ae57e KB |
1863 | char *buf) |
1864 | { | |
1865 | return sprintf(buf, "%llu\n", | |
1866 | (unsigned long long)channel->out_full_total); | |
1867 | } | |
1868 | static VMBUS_CHAN_ATTR(out_full_total, 0444, channel_out_full_total_show, NULL); | |
1869 | ||
14948e39 | 1870 | static ssize_t subchannel_monitor_id_show(struct vmbus_channel *channel, |
f0fa2974 SH |
1871 | char *buf) |
1872 | { | |
1873 | return sprintf(buf, "%u\n", channel->offermsg.monitorid); | |
1874 | } | |
1875 | static VMBUS_CHAN_ATTR(monitor_id, S_IRUGO, subchannel_monitor_id_show, NULL); | |
1876 | ||
14948e39 | 1877 | static ssize_t subchannel_id_show(struct vmbus_channel *channel, |
f0fa2974 SH |
1878 | char *buf) |
1879 | { | |
1880 | return sprintf(buf, "%u\n", | |
1881 | channel->offermsg.offer.sub_channel_index); | |
1882 | } | |
1883 | static VMBUS_CHAN_ATTR_RO(subchannel_id); | |
1884 | ||
c2e5df61 SH |
1885 | static struct attribute *vmbus_chan_attrs[] = { |
1886 | &chan_attr_out_mask.attr, | |
1887 | &chan_attr_in_mask.attr, | |
1888 | &chan_attr_read_avail.attr, | |
1889 | &chan_attr_write_avail.attr, | |
1890 | &chan_attr_cpu.attr, | |
1891 | &chan_attr_pending.attr, | |
1892 | &chan_attr_latency.attr, | |
6981fbf3 SH |
1893 | &chan_attr_interrupts.attr, |
1894 | &chan_attr_events.attr, | |
396ae57e KB |
1895 | &chan_attr_intr_in_full.attr, |
1896 | &chan_attr_intr_out_empty.attr, | |
1897 | &chan_attr_out_full_first.attr, | |
1898 | &chan_attr_out_full_total.attr, | |
f0fa2974 SH |
1899 | &chan_attr_monitor_id.attr, |
1900 | &chan_attr_subchannel_id.attr, | |
c2e5df61 SH |
1901 | NULL |
1902 | }; | |
1903 | ||
46fc1548 KB |
1904 | /* |
1905 | * Channel-level attribute_group callback function. Returns the permission for | |
1906 | * each attribute, and returns 0 if an attribute is not visible. | |
1907 | */ | |
1908 | static umode_t vmbus_chan_attr_is_visible(struct kobject *kobj, | |
1909 | struct attribute *attr, int idx) | |
1910 | { | |
1911 | const struct vmbus_channel *channel = | |
1912 | container_of(kobj, struct vmbus_channel, kobj); | |
1913 | ||
1914 | /* Hide the monitor attributes if the monitor mechanism is not used. */ | |
1915 | if (!channel->offermsg.monitor_allocated && | |
1916 | (attr == &chan_attr_pending.attr || | |
1917 | attr == &chan_attr_latency.attr || | |
1918 | attr == &chan_attr_monitor_id.attr)) | |
1919 | return 0; | |
1920 | ||
1921 | return attr->mode; | |
1922 | } | |
1923 | ||
1924 | static struct attribute_group vmbus_chan_group = { | |
1925 | .attrs = vmbus_chan_attrs, | |
1926 | .is_visible = vmbus_chan_attr_is_visible | |
1927 | }; | |
1928 | ||
c2e5df61 SH |
1929 | static struct kobj_type vmbus_chan_ktype = { |
1930 | .sysfs_ops = &vmbus_chan_sysfs_ops, | |
1931 | .release = vmbus_chan_release, | |
c2e5df61 SH |
1932 | }; |
1933 | ||
1934 | /* | |
1935 | * vmbus_add_channel_kobj - setup a sub-directory under device/channels | |
1936 | */ | |
1937 | int vmbus_add_channel_kobj(struct hv_device *dev, struct vmbus_channel *channel) | |
1938 | { | |
46fc1548 | 1939 | const struct device *device = &dev->device; |
c2e5df61 SH |
1940 | struct kobject *kobj = &channel->kobj; |
1941 | u32 relid = channel->offermsg.child_relid; | |
1942 | int ret; | |
1943 | ||
1944 | kobj->kset = dev->channels_kset; | |
1945 | ret = kobject_init_and_add(kobj, &vmbus_chan_ktype, NULL, | |
1946 | "%u", relid); | |
1947 | if (ret) | |
1948 | return ret; | |
1949 | ||
46fc1548 KB |
1950 | ret = sysfs_create_group(kobj, &vmbus_chan_group); |
1951 | ||
1952 | if (ret) { | |
1953 | /* | |
1954 | * The calling functions' error handling paths will cleanup the | |
1955 | * empty channel directory. | |
1956 | */ | |
1957 | dev_err(device, "Unable to set up channel sysfs files\n"); | |
1958 | return ret; | |
1959 | } | |
1960 | ||
c2e5df61 SH |
1961 | kobject_uevent(kobj, KOBJ_ADD); |
1962 | ||
1963 | return 0; | |
1964 | } | |
1965 | ||
46fc1548 KB |
1966 | /* |
1967 | * vmbus_remove_channel_attr_group - remove the channel's attribute group | |
1968 | */ | |
1969 | void vmbus_remove_channel_attr_group(struct vmbus_channel *channel) | |
1970 | { | |
1971 | sysfs_remove_group(&channel->kobj, &vmbus_chan_group); | |
1972 | } | |
1973 | ||
3e189519 | 1974 | /* |
f2c73011 | 1975 | * vmbus_device_create - Creates and registers a new child device |
3e189519 | 1976 | * on the vmbus. |
90c9960e | 1977 | */ |
593db803 AS |
1978 | struct hv_device *vmbus_device_create(const guid_t *type, |
1979 | const guid_t *instance, | |
1b9d48f2 | 1980 | struct vmbus_channel *channel) |
3e7ee490 | 1981 | { |
3d3b5518 | 1982 | struct hv_device *child_device_obj; |
3e7ee490 | 1983 | |
6bad88da S |
1984 | child_device_obj = kzalloc(sizeof(struct hv_device), GFP_KERNEL); |
1985 | if (!child_device_obj) { | |
0a46618d | 1986 | pr_err("Unable to allocate device object for child device\n"); |
3e7ee490 HJ |
1987 | return NULL; |
1988 | } | |
1989 | ||
cae5b843 | 1990 | child_device_obj->channel = channel; |
593db803 AS |
1991 | guid_copy(&child_device_obj->dev_type, type); |
1992 | guid_copy(&child_device_obj->dev_instance, instance); | |
7047f17d | 1993 | child_device_obj->vendor_id = 0x1414; /* MSFT vendor ID */ |
3e7ee490 | 1994 | |
3e7ee490 HJ |
1995 | return child_device_obj; |
1996 | } | |
1997 | ||
3e189519 | 1998 | /* |
22794281 | 1999 | * vmbus_device_register - Register the child device |
90c9960e | 2000 | */ |
22794281 | 2001 | int vmbus_device_register(struct hv_device *child_device_obj) |
3e7ee490 | 2002 | { |
c2e5df61 SH |
2003 | struct kobject *kobj = &child_device_obj->device.kobj; |
2004 | int ret; | |
6bad88da | 2005 | |
f6b2db08 | 2006 | dev_set_name(&child_device_obj->device, "%pUl", |
458c4475 | 2007 | &child_device_obj->channel->offermsg.offer.if_instance); |
3e7ee490 | 2008 | |
0bce28b6 | 2009 | child_device_obj->device.bus = &hv_bus; |
607c1a11 | 2010 | child_device_obj->device.parent = &hv_acpi_dev->dev; |
6bad88da | 2011 | child_device_obj->device.release = vmbus_device_release; |
3e7ee490 | 2012 | |
90c9960e GKH |
2013 | /* |
2014 | * Register with the LDM. This will kick off the driver/device | |
2015 | * binding...which will eventually call vmbus_match() and vmbus_probe() | |
2016 | */ | |
6bad88da | 2017 | ret = device_register(&child_device_obj->device); |
c2e5df61 | 2018 | if (ret) { |
0a46618d | 2019 | pr_err("Unable to register child device\n"); |
c2e5df61 SH |
2020 | return ret; |
2021 | } | |
2022 | ||
2023 | child_device_obj->channels_kset = kset_create_and_add("channels", | |
2024 | NULL, kobj); | |
2025 | if (!child_device_obj->channels_kset) { | |
2026 | ret = -ENOMEM; | |
2027 | goto err_dev_unregister; | |
2028 | } | |
2029 | ||
2030 | ret = vmbus_add_channel_kobj(child_device_obj, | |
2031 | child_device_obj->channel); | |
2032 | if (ret) { | |
2033 | pr_err("Unable to register primary channeln"); | |
2034 | goto err_kset_unregister; | |
2035 | } | |
af9ca6f9 | 2036 | hv_debug_add_dev_dir(child_device_obj); |
c2e5df61 SH |
2037 | |
2038 | return 0; | |
2039 | ||
2040 | err_kset_unregister: | |
2041 | kset_unregister(child_device_obj->channels_kset); | |
3e7ee490 | 2042 | |
c2e5df61 SH |
2043 | err_dev_unregister: |
2044 | device_unregister(&child_device_obj->device); | |
3e7ee490 HJ |
2045 | return ret; |
2046 | } | |
2047 | ||
3e189519 | 2048 | /* |
696453ba | 2049 | * vmbus_device_unregister - Remove the specified child device |
3e189519 | 2050 | * from the vmbus. |
90c9960e | 2051 | */ |
696453ba | 2052 | void vmbus_device_unregister(struct hv_device *device_obj) |
3e7ee490 | 2053 | { |
84672369 FS |
2054 | pr_debug("child device %s unregistered\n", |
2055 | dev_name(&device_obj->device)); | |
2056 | ||
869b5567 DC |
2057 | kset_unregister(device_obj->channels_kset); |
2058 | ||
90c9960e GKH |
2059 | /* |
2060 | * Kick off the process of unregistering the device. | |
2061 | * This will call vmbus_remove() and eventually vmbus_device_release() | |
2062 | */ | |
6bad88da | 2063 | device_unregister(&device_obj->device); |
3e7ee490 HJ |
2064 | } |
2065 | ||
3e7ee490 | 2066 | |
b0069f43 | 2067 | /* |
7f163a6f | 2068 | * VMBUS is an acpi enumerated device. Get the information we |
90f34535 | 2069 | * need from DSDT. |
b0069f43 | 2070 | */ |
7f163a6f | 2071 | #define VTPM_BASE_ADDRESS 0xfed40000 |
90f34535 | 2072 | static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx) |
b0069f43 | 2073 | { |
7f163a6f JO |
2074 | resource_size_t start = 0; |
2075 | resource_size_t end = 0; | |
2076 | struct resource *new_res; | |
2077 | struct resource **old_res = &hyperv_mmio; | |
2078 | struct resource **prev_res = NULL; | |
626b901f | 2079 | struct resource r; |
7f163a6f | 2080 | |
90f34535 | 2081 | switch (res->type) { |
7f163a6f JO |
2082 | |
2083 | /* | |
2084 | * "Address" descriptors are for bus windows. Ignore | |
2085 | * "memory" descriptors, which are for registers on | |
2086 | * devices. | |
2087 | */ | |
2088 | case ACPI_RESOURCE_TYPE_ADDRESS32: | |
2089 | start = res->data.address32.address.minimum; | |
2090 | end = res->data.address32.address.maximum; | |
4eb923f8 | 2091 | break; |
b0069f43 | 2092 | |
90f34535 | 2093 | case ACPI_RESOURCE_TYPE_ADDRESS64: |
7f163a6f JO |
2094 | start = res->data.address64.address.minimum; |
2095 | end = res->data.address64.address.maximum; | |
4eb923f8 | 2096 | break; |
7f163a6f | 2097 | |
626b901f MK |
2098 | /* |
2099 | * The IRQ information is needed only on ARM64, which Hyper-V | |
2100 | * sets up in the extended format. IRQ information is present | |
2101 | * on x86/x64 in the non-extended format but it is not used by | |
2102 | * Linux. So don't bother checking for the non-extended format. | |
2103 | */ | |
2104 | case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: | |
2105 | if (!acpi_dev_resource_interrupt(res, 0, &r)) { | |
2106 | pr_err("Unable to parse Hyper-V ACPI interrupt\n"); | |
2107 | return AE_ERROR; | |
2108 | } | |
2109 | /* ARM64 INTID for VMbus */ | |
2110 | vmbus_interrupt = res->data.extended_irq.interrupts[0]; | |
2111 | /* Linux IRQ number */ | |
2112 | vmbus_irq = r.start; | |
2113 | return AE_OK; | |
2114 | ||
7f163a6f JO |
2115 | default: |
2116 | /* Unused resource type */ | |
2117 | return AE_OK; | |
2118 | ||
b0069f43 | 2119 | } |
7f163a6f JO |
2120 | /* |
2121 | * Ignore ranges that are below 1MB, as they're not | |
2122 | * necessary or useful here. | |
2123 | */ | |
2124 | if (end < 0x100000) | |
2125 | return AE_OK; | |
2126 | ||
2127 | new_res = kzalloc(sizeof(*new_res), GFP_ATOMIC); | |
2128 | if (!new_res) | |
2129 | return AE_NO_MEMORY; | |
2130 | ||
2131 | /* If this range overlaps the virtual TPM, truncate it. */ | |
2132 | if (end > VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS) | |
2133 | end = VTPM_BASE_ADDRESS; | |
2134 | ||
2135 | new_res->name = "hyperv mmio"; | |
2136 | new_res->flags = IORESOURCE_MEM; | |
2137 | new_res->start = start; | |
2138 | new_res->end = end; | |
2139 | ||
40f26f31 | 2140 | /* |
40f26f31 JO |
2141 | * If two ranges are adjacent, merge them. |
2142 | */ | |
7f163a6f JO |
2143 | do { |
2144 | if (!*old_res) { | |
2145 | *old_res = new_res; | |
2146 | break; | |
2147 | } | |
2148 | ||
40f26f31 JO |
2149 | if (((*old_res)->end + 1) == new_res->start) { |
2150 | (*old_res)->end = new_res->end; | |
2151 | kfree(new_res); | |
2152 | break; | |
2153 | } | |
2154 | ||
2155 | if ((*old_res)->start == new_res->end + 1) { | |
2156 | (*old_res)->start = new_res->start; | |
2157 | kfree(new_res); | |
2158 | break; | |
2159 | } | |
2160 | ||
23a06831 | 2161 | if ((*old_res)->start > new_res->end) { |
7f163a6f JO |
2162 | new_res->sibling = *old_res; |
2163 | if (prev_res) | |
2164 | (*prev_res)->sibling = new_res; | |
2165 | *old_res = new_res; | |
2166 | break; | |
2167 | } | |
2168 | ||
2169 | prev_res = old_res; | |
2170 | old_res = &(*old_res)->sibling; | |
2171 | ||
2172 | } while (1); | |
b0069f43 S |
2173 | |
2174 | return AE_OK; | |
2175 | } | |
2176 | ||
7f163a6f JO |
2177 | static int vmbus_acpi_remove(struct acpi_device *device) |
2178 | { | |
2179 | struct resource *cur_res; | |
2180 | struct resource *next_res; | |
2181 | ||
2182 | if (hyperv_mmio) { | |
6d146aef JO |
2183 | if (fb_mmio) { |
2184 | __release_region(hyperv_mmio, fb_mmio->start, | |
2185 | resource_size(fb_mmio)); | |
2186 | fb_mmio = NULL; | |
2187 | } | |
2188 | ||
7f163a6f JO |
2189 | for (cur_res = hyperv_mmio; cur_res; cur_res = next_res) { |
2190 | next_res = cur_res->sibling; | |
2191 | kfree(cur_res); | |
2192 | } | |
2193 | } | |
2194 | ||
2195 | return 0; | |
2196 | } | |
2197 | ||
6d146aef JO |
2198 | static void vmbus_reserve_fb(void) |
2199 | { | |
2200 | int size; | |
2201 | /* | |
2202 | * Make a claim for the frame buffer in the resource tree under the | |
2203 | * first node, which will be the one below 4GB. The length seems to | |
2204 | * be underreported, particularly in a Generation 1 VM. So start out | |
2205 | * reserving a larger area and make it smaller until it succeeds. | |
2206 | */ | |
2207 | ||
2208 | if (screen_info.lfb_base) { | |
2209 | if (efi_enabled(EFI_BOOT)) | |
2210 | size = max_t(__u32, screen_info.lfb_size, 0x800000); | |
2211 | else | |
2212 | size = max_t(__u32, screen_info.lfb_size, 0x4000000); | |
2213 | ||
2214 | for (; !fb_mmio && (size >= 0x100000); size >>= 1) { | |
2215 | fb_mmio = __request_region(hyperv_mmio, | |
2216 | screen_info.lfb_base, size, | |
2217 | fb_mmio_name, 0); | |
2218 | } | |
2219 | } | |
2220 | } | |
2221 | ||
35464483 JO |
2222 | /** |
2223 | * vmbus_allocate_mmio() - Pick a memory-mapped I/O range. | |
2224 | * @new: If successful, supplied a pointer to the | |
2225 | * allocated MMIO space. | |
2226 | * @device_obj: Identifies the caller | |
2227 | * @min: Minimum guest physical address of the | |
2228 | * allocation | |
2229 | * @max: Maximum guest physical address | |
2230 | * @size: Size of the range to be allocated | |
2231 | * @align: Alignment of the range to be allocated | |
2232 | * @fb_overlap_ok: Whether this allocation can be allowed | |
2233 | * to overlap the video frame buffer. | |
2234 | * | |
2235 | * This function walks the resources granted to VMBus by the | |
2236 | * _CRS object in the ACPI namespace underneath the parent | |
2237 | * "bridge" whether that's a root PCI bus in the Generation 1 | |
2238 | * case or a Module Device in the Generation 2 case. It then | |
2239 | * attempts to allocate from the global MMIO pool in a way that | |
2240 | * matches the constraints supplied in these parameters and by | |
2241 | * that _CRS. | |
2242 | * | |
2243 | * Return: 0 on success, -errno on failure | |
2244 | */ | |
2245 | int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, | |
2246 | resource_size_t min, resource_size_t max, | |
2247 | resource_size_t size, resource_size_t align, | |
2248 | bool fb_overlap_ok) | |
2249 | { | |
be000f93 | 2250 | struct resource *iter, *shadow; |
ea37a6b8 | 2251 | resource_size_t range_min, range_max, start; |
35464483 | 2252 | const char *dev_n = dev_name(&device_obj->device); |
ea37a6b8 | 2253 | int retval; |
e16dad6b JO |
2254 | |
2255 | retval = -ENXIO; | |
8aea7f82 | 2256 | mutex_lock(&hyperv_mmio_lock); |
35464483 | 2257 | |
ea37a6b8 JO |
2258 | /* |
2259 | * If overlaps with frame buffers are allowed, then first attempt to | |
2260 | * make the allocation from within the reserved region. Because it | |
2261 | * is already reserved, no shadow allocation is necessary. | |
2262 | */ | |
2263 | if (fb_overlap_ok && fb_mmio && !(min > fb_mmio->end) && | |
2264 | !(max < fb_mmio->start)) { | |
2265 | ||
2266 | range_min = fb_mmio->start; | |
2267 | range_max = fb_mmio->end; | |
2268 | start = (range_min + align - 1) & ~(align - 1); | |
2269 | for (; start + size - 1 <= range_max; start += align) { | |
2270 | *new = request_mem_region_exclusive(start, size, dev_n); | |
2271 | if (*new) { | |
2272 | retval = 0; | |
2273 | goto exit; | |
2274 | } | |
2275 | } | |
2276 | } | |
2277 | ||
35464483 JO |
2278 | for (iter = hyperv_mmio; iter; iter = iter->sibling) { |
2279 | if ((iter->start >= max) || (iter->end <= min)) | |
2280 | continue; | |
2281 | ||
2282 | range_min = iter->start; | |
2283 | range_max = iter->end; | |
ea37a6b8 JO |
2284 | start = (range_min + align - 1) & ~(align - 1); |
2285 | for (; start + size - 1 <= range_max; start += align) { | |
2286 | shadow = __request_region(iter, start, size, NULL, | |
2287 | IORESOURCE_BUSY); | |
2288 | if (!shadow) | |
2289 | continue; | |
2290 | ||
2291 | *new = request_mem_region_exclusive(start, size, dev_n); | |
2292 | if (*new) { | |
2293 | shadow->name = (char *)*new; | |
2294 | retval = 0; | |
2295 | goto exit; | |
35464483 JO |
2296 | } |
2297 | ||
ea37a6b8 | 2298 | __release_region(iter, start, size); |
35464483 JO |
2299 | } |
2300 | } | |
2301 | ||
e16dad6b | 2302 | exit: |
8aea7f82 | 2303 | mutex_unlock(&hyperv_mmio_lock); |
e16dad6b | 2304 | return retval; |
35464483 JO |
2305 | } |
2306 | EXPORT_SYMBOL_GPL(vmbus_allocate_mmio); | |
2307 | ||
97fb77dc JO |
2308 | /** |
2309 | * vmbus_free_mmio() - Free a memory-mapped I/O range. | |
2310 | * @start: Base address of region to release. | |
2311 | * @size: Size of the range to be allocated | |
2312 | * | |
2313 | * This function releases anything requested by | |
2314 | * vmbus_mmio_allocate(). | |
2315 | */ | |
2316 | void vmbus_free_mmio(resource_size_t start, resource_size_t size) | |
2317 | { | |
be000f93 JO |
2318 | struct resource *iter; |
2319 | ||
8aea7f82 | 2320 | mutex_lock(&hyperv_mmio_lock); |
be000f93 JO |
2321 | for (iter = hyperv_mmio; iter; iter = iter->sibling) { |
2322 | if ((iter->start >= start + size) || (iter->end <= start)) | |
2323 | continue; | |
2324 | ||
2325 | __release_region(iter, start, size); | |
2326 | } | |
97fb77dc | 2327 | release_mem_region(start, size); |
8aea7f82 | 2328 | mutex_unlock(&hyperv_mmio_lock); |
97fb77dc JO |
2329 | |
2330 | } | |
2331 | EXPORT_SYMBOL_GPL(vmbus_free_mmio); | |
2332 | ||
b0069f43 S |
2333 | static int vmbus_acpi_add(struct acpi_device *device) |
2334 | { | |
2335 | acpi_status result; | |
90f34535 | 2336 | int ret_val = -ENODEV; |
7f163a6f | 2337 | struct acpi_device *ancestor; |
b0069f43 | 2338 | |
607c1a11 S |
2339 | hv_acpi_dev = device; |
2340 | ||
0a4425b6 | 2341 | result = acpi_walk_resources(device->handle, METHOD_NAME__CRS, |
90f34535 | 2342 | vmbus_walk_resources, NULL); |
b0069f43 | 2343 | |
90f34535 S |
2344 | if (ACPI_FAILURE(result)) |
2345 | goto acpi_walk_err; | |
2346 | /* | |
7f163a6f JO |
2347 | * Some ancestor of the vmbus acpi device (Gen1 or Gen2 |
2348 | * firmware) is the VMOD that has the mmio ranges. Get that. | |
90f34535 | 2349 | */ |
7f163a6f JO |
2350 | for (ancestor = device->parent; ancestor; ancestor = ancestor->parent) { |
2351 | result = acpi_walk_resources(ancestor->handle, METHOD_NAME__CRS, | |
2352 | vmbus_walk_resources, NULL); | |
90f34535 S |
2353 | |
2354 | if (ACPI_FAILURE(result)) | |
7f163a6f | 2355 | continue; |
6d146aef JO |
2356 | if (hyperv_mmio) { |
2357 | vmbus_reserve_fb(); | |
7f163a6f | 2358 | break; |
6d146aef | 2359 | } |
b0069f43 | 2360 | } |
90f34535 S |
2361 | ret_val = 0; |
2362 | ||
2363 | acpi_walk_err: | |
b0069f43 | 2364 | complete(&probe_event); |
7f163a6f JO |
2365 | if (ret_val) |
2366 | vmbus_acpi_remove(device); | |
90f34535 | 2367 | return ret_val; |
b0069f43 S |
2368 | } |
2369 | ||
83b50f83 | 2370 | #ifdef CONFIG_PM_SLEEP |
f53335e3 DC |
2371 | static int vmbus_bus_suspend(struct device *dev) |
2372 | { | |
b307b389 | 2373 | struct vmbus_channel *channel, *sc; |
1f48dcf1 DC |
2374 | |
2375 | while (atomic_read(&vmbus_connection.offer_in_progress) != 0) { | |
2376 | /* | |
2377 | * We wait here until the completion of any channel | |
2378 | * offers that are currently in progress. | |
2379 | */ | |
2380 | msleep(1); | |
2381 | } | |
2382 | ||
2383 | mutex_lock(&vmbus_connection.channel_mutex); | |
2384 | list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { | |
2385 | if (!is_hvsock_channel(channel)) | |
2386 | continue; | |
2387 | ||
2388 | vmbus_force_channel_rescinded(channel); | |
2389 | } | |
2390 | mutex_unlock(&vmbus_connection.channel_mutex); | |
2391 | ||
b307b389 DC |
2392 | /* |
2393 | * Wait until all the sub-channels and hv_sock channels have been | |
2394 | * cleaned up. Sub-channels should be destroyed upon suspend, otherwise | |
2395 | * they would conflict with the new sub-channels that will be created | |
2396 | * in the resume path. hv_sock channels should also be destroyed, but | |
2397 | * a hv_sock channel of an established hv_sock connection can not be | |
2398 | * really destroyed since it may still be referenced by the userspace | |
2399 | * application, so we just force the hv_sock channel to be rescinded | |
2400 | * by vmbus_force_channel_rescinded(), and the userspace application | |
2401 | * will thoroughly destroy the channel after hibernation. | |
2402 | * | |
2403 | * Note: the counter nr_chan_close_on_suspend may never go above 0 if | |
2404 | * the VM has no sub-channel and hv_sock channel, e.g. a 1-vCPU VM. | |
2405 | */ | |
2406 | if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0) | |
2407 | wait_for_completion(&vmbus_connection.ready_for_suspend_event); | |
2408 | ||
19873eec DC |
2409 | if (atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0) { |
2410 | pr_err("Can not suspend due to a previous failed resuming\n"); | |
2411 | return -EBUSY; | |
2412 | } | |
d8bd2d44 | 2413 | |
b307b389 DC |
2414 | mutex_lock(&vmbus_connection.channel_mutex); |
2415 | ||
2416 | list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { | |
d8bd2d44 | 2417 | /* |
8b6a877c APM |
2418 | * Remove the channel from the array of channels and invalidate |
2419 | * the channel's relid. Upon resume, vmbus_onoffer() will fix | |
2420 | * up the relid (and other fields, if necessary) and add the | |
2421 | * channel back to the array. | |
d8bd2d44 | 2422 | */ |
8b6a877c | 2423 | vmbus_channel_unmap_relid(channel); |
d8bd2d44 DC |
2424 | channel->offermsg.child_relid = INVALID_RELID; |
2425 | ||
b307b389 DC |
2426 | if (is_hvsock_channel(channel)) { |
2427 | if (!channel->rescind) { | |
2428 | pr_err("hv_sock channel not rescinded!\n"); | |
2429 | WARN_ON_ONCE(1); | |
2430 | } | |
2431 | continue; | |
2432 | } | |
2433 | ||
b307b389 DC |
2434 | list_for_each_entry(sc, &channel->sc_list, sc_list) { |
2435 | pr_err("Sub-channel not deleted!\n"); | |
2436 | WARN_ON_ONCE(1); | |
2437 | } | |
d8bd2d44 DC |
2438 | |
2439 | atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume); | |
b307b389 DC |
2440 | } |
2441 | ||
2442 | mutex_unlock(&vmbus_connection.channel_mutex); | |
2443 | ||
f53335e3 DC |
2444 | vmbus_initiate_unload(false); |
2445 | ||
d8bd2d44 DC |
2446 | /* Reset the event for the next resume. */ |
2447 | reinit_completion(&vmbus_connection.ready_for_resume_event); | |
2448 | ||
f53335e3 DC |
2449 | return 0; |
2450 | } | |
2451 | ||
2452 | static int vmbus_bus_resume(struct device *dev) | |
2453 | { | |
2454 | struct vmbus_channel_msginfo *msginfo; | |
2455 | size_t msgsize; | |
2456 | int ret; | |
2457 | ||
2458 | /* | |
2459 | * We only use the 'vmbus_proto_version', which was in use before | |
2460 | * hibernation, to re-negotiate with the host. | |
2461 | */ | |
bedc61a9 | 2462 | if (!vmbus_proto_version) { |
f53335e3 DC |
2463 | pr_err("Invalid proto version = 0x%x\n", vmbus_proto_version); |
2464 | return -EINVAL; | |
2465 | } | |
2466 | ||
2467 | msgsize = sizeof(*msginfo) + | |
2468 | sizeof(struct vmbus_channel_initiate_contact); | |
2469 | ||
2470 | msginfo = kzalloc(msgsize, GFP_KERNEL); | |
2471 | ||
2472 | if (msginfo == NULL) | |
2473 | return -ENOMEM; | |
2474 | ||
2475 | ret = vmbus_negotiate_version(msginfo, vmbus_proto_version); | |
2476 | ||
2477 | kfree(msginfo); | |
2478 | ||
2479 | if (ret != 0) | |
2480 | return ret; | |
2481 | ||
d8bd2d44 DC |
2482 | WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0); |
2483 | ||
f53335e3 DC |
2484 | vmbus_request_offers(); |
2485 | ||
19873eec DC |
2486 | if (wait_for_completion_timeout( |
2487 | &vmbus_connection.ready_for_resume_event, 10 * HZ) == 0) | |
2488 | pr_err("Some vmbus device is missing after suspending?\n"); | |
d8bd2d44 | 2489 | |
b307b389 DC |
2490 | /* Reset the event for the next suspend. */ |
2491 | reinit_completion(&vmbus_connection.ready_for_suspend_event); | |
2492 | ||
f53335e3 DC |
2493 | return 0; |
2494 | } | |
1a06d017 DC |
2495 | #else |
2496 | #define vmbus_bus_suspend NULL | |
2497 | #define vmbus_bus_resume NULL | |
83b50f83 | 2498 | #endif /* CONFIG_PM_SLEEP */ |
f53335e3 | 2499 | |
b0069f43 S |
2500 | static const struct acpi_device_id vmbus_acpi_device_ids[] = { |
2501 | {"VMBUS", 0}, | |
9d7b18d1 | 2502 | {"VMBus", 0}, |
b0069f43 S |
2503 | {"", 0}, |
2504 | }; | |
2505 | MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids); | |
2506 | ||
f53335e3 | 2507 | /* |
1a06d017 DC |
2508 | * Note: we must use the "no_irq" ops, otherwise hibernation can not work with |
2509 | * PCI device assignment, because "pci_dev_pm_ops" uses the "noirq" ops: in | |
2510 | * the resume path, the pci "noirq" restore op runs before "non-noirq" op (see | |
f53335e3 DC |
2511 | * resume_target_kernel() -> dpm_resume_start(), and hibernation_restore() -> |
2512 | * dpm_resume_end()). This means vmbus_bus_resume() and the pci-hyperv's | |
1a06d017 DC |
2513 | * resume callback must also run via the "noirq" ops. |
2514 | * | |
2515 | * Set suspend_noirq/resume_noirq to NULL for Suspend-to-Idle: see the comment | |
2516 | * earlier in this file before vmbus_pm. | |
f53335e3 | 2517 | */ |
1a06d017 | 2518 | |
f53335e3 | 2519 | static const struct dev_pm_ops vmbus_bus_pm = { |
1a06d017 DC |
2520 | .suspend_noirq = NULL, |
2521 | .resume_noirq = NULL, | |
2522 | .freeze_noirq = vmbus_bus_suspend, | |
2523 | .thaw_noirq = vmbus_bus_resume, | |
2524 | .poweroff_noirq = vmbus_bus_suspend, | |
2525 | .restore_noirq = vmbus_bus_resume | |
f53335e3 DC |
2526 | }; |
2527 | ||
b0069f43 S |
2528 | static struct acpi_driver vmbus_acpi_driver = { |
2529 | .name = "vmbus", | |
2530 | .ids = vmbus_acpi_device_ids, | |
2531 | .ops = { | |
2532 | .add = vmbus_acpi_add, | |
e4ecb41c | 2533 | .remove = vmbus_acpi_remove, |
b0069f43 | 2534 | }, |
f53335e3 | 2535 | .drv.pm = &vmbus_bus_pm, |
b0069f43 S |
2536 | }; |
2537 | ||
2517281d VK |
2538 | static void hv_kexec_handler(void) |
2539 | { | |
fd1fea68 | 2540 | hv_stimer_global_cleanup(); |
75ff3a8a | 2541 | vmbus_initiate_unload(false); |
523b9408 VK |
2542 | /* Make sure conn_state is set as hv_synic_cleanup checks for it */ |
2543 | mb(); | |
76d36ab7 | 2544 | cpuhp_remove_state(hyperv_cpuhp_online); |
d6f3609d | 2545 | hyperv_cleanup(); |
2517281d VK |
2546 | }; |
2547 | ||
b4370df2 VK |
2548 | static void hv_crash_handler(struct pt_regs *regs) |
2549 | { | |
fd1fea68 MK |
2550 | int cpu; |
2551 | ||
75ff3a8a | 2552 | vmbus_initiate_unload(true); |
b4370df2 VK |
2553 | /* |
2554 | * In crash handler we can't schedule synic cleanup for all CPUs, | |
2555 | * doing the cleanup for current CPU only. This should be sufficient | |
2556 | * for kdump. | |
2557 | */ | |
fd1fea68 MK |
2558 | cpu = smp_processor_id(); |
2559 | hv_stimer_cleanup(cpu); | |
7a1323b5 | 2560 | hv_synic_disable_regs(cpu); |
d6f3609d | 2561 | hyperv_cleanup(); |
b4370df2 VK |
2562 | }; |
2563 | ||
63ecc6d2 DC |
2564 | static int hv_synic_suspend(void) |
2565 | { | |
2566 | /* | |
4df4cb9e MK |
2567 | * When we reach here, all the non-boot CPUs have been offlined. |
2568 | * If we're in a legacy configuration where stimer Direct Mode is | |
2569 | * not enabled, the stimers on the non-boot CPUs have been unbound | |
2570 | * in hv_synic_cleanup() -> hv_stimer_legacy_cleanup() -> | |
63ecc6d2 DC |
2571 | * hv_stimer_cleanup() -> clockevents_unbind_device(). |
2572 | * | |
4df4cb9e MK |
2573 | * hv_synic_suspend() only runs on CPU0 with interrupts disabled. |
2574 | * Here we do not call hv_stimer_legacy_cleanup() on CPU0 because: | |
2575 | * 1) it's unnecessary as interrupts remain disabled between | |
2576 | * syscore_suspend() and syscore_resume(): see create_image() and | |
2577 | * resume_target_kernel() | |
63ecc6d2 DC |
2578 | * 2) the stimer on CPU0 is automatically disabled later by |
2579 | * syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ... | |
4df4cb9e MK |
2580 | * -> clockevents_shutdown() -> ... -> hv_ce_shutdown() |
2581 | * 3) a warning would be triggered if we call | |
2582 | * clockevents_unbind_device(), which may sleep, in an | |
2583 | * interrupts-disabled context. | |
63ecc6d2 DC |
2584 | */ |
2585 | ||
2586 | hv_synic_disable_regs(0); | |
2587 | ||
2588 | return 0; | |
2589 | } | |
2590 | ||
2591 | static void hv_synic_resume(void) | |
2592 | { | |
2593 | hv_synic_enable_regs(0); | |
2594 | ||
2595 | /* | |
2596 | * Note: we don't need to call hv_stimer_init(0), because the timer | |
2597 | * on CPU0 is not unbound in hv_synic_suspend(), and the timer is | |
2598 | * automatically re-enabled in timekeeping_resume(). | |
2599 | */ | |
2600 | } | |
2601 | ||
2602 | /* The callbacks run only on CPU0, with irqs_disabled. */ | |
2603 | static struct syscore_ops hv_synic_syscore_ops = { | |
2604 | .suspend = hv_synic_suspend, | |
2605 | .resume = hv_synic_resume, | |
2606 | }; | |
2607 | ||
607c1a11 | 2608 | static int __init hv_acpi_init(void) |
1168ac22 | 2609 | { |
2dda95f8 | 2610 | int ret, t; |
b0069f43 | 2611 | |
4a5f3cde | 2612 | if (!hv_is_hyperv_initialized()) |
0592969e JW |
2613 | return -ENODEV; |
2614 | ||
b0069f43 S |
2615 | init_completion(&probe_event); |
2616 | ||
2617 | /* | |
efc26722 | 2618 | * Get ACPI resources first. |
b0069f43 | 2619 | */ |
0246604c S |
2620 | ret = acpi_bus_register_driver(&vmbus_acpi_driver); |
2621 | ||
b0069f43 S |
2622 | if (ret) |
2623 | return ret; | |
2624 | ||
2dda95f8 S |
2625 | t = wait_for_completion_timeout(&probe_event, 5*HZ); |
2626 | if (t == 0) { | |
2627 | ret = -ETIMEDOUT; | |
2628 | goto cleanup; | |
2629 | } | |
af9ca6f9 | 2630 | hv_debug_init(); |
b0069f43 | 2631 | |
efc26722 | 2632 | ret = vmbus_bus_init(); |
91fd799e | 2633 | if (ret) |
2dda95f8 S |
2634 | goto cleanup; |
2635 | ||
2517281d | 2636 | hv_setup_kexec_handler(hv_kexec_handler); |
b4370df2 | 2637 | hv_setup_crash_handler(hv_crash_handler); |
2517281d | 2638 | |
63ecc6d2 DC |
2639 | register_syscore_ops(&hv_synic_syscore_ops); |
2640 | ||
2dda95f8 S |
2641 | return 0; |
2642 | ||
2643 | cleanup: | |
2644 | acpi_bus_unregister_driver(&vmbus_acpi_driver); | |
cf6a2eac | 2645 | hv_acpi_dev = NULL; |
91fd799e | 2646 | return ret; |
1168ac22 S |
2647 | } |
2648 | ||
93e5bd06 S |
2649 | static void __exit vmbus_exit(void) |
2650 | { | |
e72e7ac5 VK |
2651 | int cpu; |
2652 | ||
63ecc6d2 DC |
2653 | unregister_syscore_ops(&hv_synic_syscore_ops); |
2654 | ||
2517281d | 2655 | hv_remove_kexec_handler(); |
b4370df2 | 2656 | hv_remove_crash_handler(); |
09a19628 | 2657 | vmbus_connection.conn_state = DISCONNECTED; |
fd1fea68 | 2658 | hv_stimer_global_cleanup(); |
2db84eff | 2659 | vmbus_disconnect(); |
76d388cd | 2660 | hv_remove_vmbus_irq(); |
37cdd991 SH |
2661 | for_each_online_cpu(cpu) { |
2662 | struct hv_per_cpu_context *hv_cpu | |
2663 | = per_cpu_ptr(hv_context.cpu_context, cpu); | |
2664 | ||
2665 | tasklet_kill(&hv_cpu->msg_dpc); | |
2666 | } | |
af9ca6f9 BB |
2667 | hv_debug_rm_all_dir(); |
2668 | ||
93e5bd06 | 2669 | vmbus_free_channels(); |
8b6a877c | 2670 | kfree(vmbus_connection.channels); |
37cdd991 | 2671 | |
cc2dd402 | 2672 | if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { |
81b18bce | 2673 | kmsg_dump_unregister(&hv_kmsg_dumper); |
510f7aef | 2674 | unregister_die_notifier(&hyperv_die_block); |
096c605f VK |
2675 | atomic_notifier_chain_unregister(&panic_notifier_list, |
2676 | &hyperv_panic_block); | |
2677 | } | |
81b18bce SM |
2678 | |
2679 | free_page((unsigned long)hv_panic_page); | |
8afc06dd SM |
2680 | unregister_sysctl_table(hv_ctl_table_hdr); |
2681 | hv_ctl_table_hdr = NULL; | |
93e5bd06 | 2682 | bus_unregister(&hv_bus); |
37cdd991 | 2683 | |
76d36ab7 | 2684 | cpuhp_remove_state(hyperv_cpuhp_online); |
06210b42 | 2685 | hv_synic_free(); |
93e5bd06 S |
2686 | acpi_bus_unregister_driver(&vmbus_acpi_driver); |
2687 | } | |
2688 | ||
1168ac22 | 2689 | |
90c9960e | 2690 | MODULE_LICENSE("GPL"); |
674eecb3 | 2691 | MODULE_DESCRIPTION("Microsoft Hyper-V VMBus Driver"); |
3e7ee490 | 2692 | |
43d4e119 | 2693 | subsys_initcall(hv_acpi_init); |
93e5bd06 | 2694 | module_exit(vmbus_exit); |