]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/test/lib/nvme/aer/aer.c
bump version to 12.2.12-pve1
[ceph.git] / ceph / src / spdk / test / lib / nvme / aer / aer.c
CommitLineData
7c673cae
FG
1/*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#include <inttypes.h>
35#include <stdbool.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <unistd.h>
39#include <string.h>
40
41#include "spdk/log.h"
42#include "spdk/nvme.h"
43#include "spdk/env.h"
44
45#define MAX_DEVS 64
46
47struct dev {
48 struct spdk_nvme_ctrlr *ctrlr;
49 struct spdk_nvme_health_information_page *health_page;
50 uint32_t orig_temp_threshold;
51 char name[100];
52};
53
54#define ADMINQ_SIZE 128
55
56static struct dev devs[MAX_DEVS];
57static int num_devs = 0;
58
59static int aer_done = 0;
60static int get_queues_done = 0;
61
62#define foreach_dev(iter) \
63 for (iter = devs; iter - devs < num_devs; iter++)
64
65
66static int temperature_done = 0;
67static int failed = 0;
68static struct spdk_nvme_transport_id g_trid;
69
70static void set_feature_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl)
71{
72 struct dev *dev = cb_arg;
73
74 if (spdk_nvme_cpl_is_error(cpl)) {
75 printf("%s: set feature (temp threshold) failed\n", dev->name);
76 failed = 1;
77 return;
78 }
79
80 /* Admin command completions are synchronized by the NVMe driver,
81 * so we don't need to do any special locking here. */
82 temperature_done++;
83}
84
85static int
86set_temp_threshold(struct dev *dev, uint32_t temp)
87{
88 struct spdk_nvme_cmd cmd = {};
89
90 cmd.opc = SPDK_NVME_OPC_SET_FEATURES;
91 cmd.cdw10 = SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD;
92 cmd.cdw11 = temp;
93
94 return spdk_nvme_ctrlr_cmd_admin_raw(dev->ctrlr, &cmd, NULL, 0, set_feature_completion, dev);
95}
96
97static void
98get_feature_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl)
99{
100 struct dev *dev = cb_arg;
101
102 if (spdk_nvme_cpl_is_error(cpl)) {
103 printf("%s: get feature (temp threshold) failed\n", dev->name);
104 failed = 1;
105 return;
106 }
107
108 dev->orig_temp_threshold = cpl->cdw0;
109 printf("%s: original temperature threshold: %u Kelvin (%d Celsius)\n",
110 dev->name, dev->orig_temp_threshold, dev->orig_temp_threshold - 273);
111
112 temperature_done++;
113}
114
115static int
116get_temp_threshold(struct dev *dev)
117{
118 struct spdk_nvme_cmd cmd = {};
119
120 cmd.opc = SPDK_NVME_OPC_GET_FEATURES;
121 cmd.cdw10 = SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD;
122
123 return spdk_nvme_ctrlr_cmd_admin_raw(dev->ctrlr, &cmd, NULL, 0, get_feature_completion, dev);
124}
125
126static void
127print_health_page(struct dev *dev, struct spdk_nvme_health_information_page *hip)
128{
129 printf("%s: Current Temperature: %u Kelvin (%d Celsius)\n",
130 dev->name, hip->temperature, hip->temperature - 273);
131}
132
133static void
134get_log_page_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl)
135{
136 struct dev *dev = cb_arg;
137
138 if (spdk_nvme_cpl_is_error(cpl)) {
139 printf("%s: get log page failed\n", dev->name);
140 failed = 1;
141 return;
142 }
143
144 print_health_page(dev, dev->health_page);
145 aer_done++;
146}
147
148static int
149get_health_log_page(struct dev *dev)
150{
151 return spdk_nvme_ctrlr_cmd_get_log_page(dev->ctrlr, SPDK_NVME_LOG_HEALTH_INFORMATION,
152 SPDK_NVME_GLOBAL_NS_TAG, dev->health_page, sizeof(*dev->health_page), 0,
153 get_log_page_completion, dev);
154}
155
156static void
157cleanup(void)
158{
159 struct dev *dev;
160
161 foreach_dev(dev) {
162 if (dev->health_page) {
163 spdk_free(dev->health_page);
164 }
165 }
166}
167
168static void aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
169{
170 uint32_t log_page_id = (cpl->cdw0 & 0xFF0000) >> 16;
171 struct dev *dev = arg;
172
173 if (spdk_nvme_cpl_is_error(cpl)) {
174 printf("%s: AER failed\n", dev->name);
175 failed = 1;
176 return;
177 }
178
179 printf("%s: aer_cb for log page %d\n", dev->name, log_page_id);
180
181 /* Set the temperature threshold back to the original value
182 * so the AER doesn't trigger again.
183 */
184 set_temp_threshold(dev, dev->orig_temp_threshold);
185
186 get_health_log_page(dev);
187}
188
189static void
190usage(const char *program_name)
191{
192 printf("%s [options]", program_name);
193 printf("\n");
194 printf("options:\n");
195 printf(" -r trid remote NVMe over Fabrics target address\n");
196 printf(" Format: 'key:value [key:value] ...'\n");
197 printf(" Keys:\n");
198 printf(" trtype Transport type (e.g. RDMA)\n");
199 printf(" adrfam Address family (e.g. IPv4, IPv6)\n");
200 printf(" traddr Transport address (e.g. 192.168.100.8)\n");
201 printf(" trsvcid Transport service identifier (e.g. 4420)\n");
202 printf(" subnqn Subsystem NQN (default: %s)\n", SPDK_NVMF_DISCOVERY_NQN);
203 printf(" Example: -r 'trtype:RDMA adrfam:IPv4 traddr:192.168.100.8 trsvcid:4420'\n");
204
205 spdk_tracelog_usage(stdout, "-t");
206
207 printf(" -v verbose (enable warnings)\n");
208 printf(" -H show this usage\n");
209}
210
211static int
212parse_args(int argc, char **argv)
213{
214 int op, rc;
215
216 g_trid.trtype = SPDK_NVME_TRANSPORT_PCIE;
217 snprintf(g_trid.subnqn, sizeof(g_trid.subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN);
218
219 while ((op = getopt(argc, argv, "r:t:H")) != -1) {
220 switch (op) {
221 case 't':
222 rc = spdk_log_set_trace_flag(optarg);
223 if (rc < 0) {
224 fprintf(stderr, "unknown flag\n");
225 usage(argv[0]);
226 exit(EXIT_FAILURE);
227 }
228#ifndef DEBUG
229 fprintf(stderr, "%s must be rebuilt with CONFIG_DEBUG=y for -t flag.\n",
230 argv[0]);
231 usage(argv[0]);
232 return 0;
233#endif
234 break;
235 case 'r':
236 if (spdk_nvme_transport_id_parse(&g_trid, optarg) != 0) {
237 fprintf(stderr, "Error parsing transport address\n");
238 return 1;
239 }
240 break;
241 case 'H':
242 default:
243 usage(argv[0]);
244 return 1;
245 }
246 }
247
248 optind = 1;
249
250 return 0;
251}
252
253static bool
254probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
255 struct spdk_nvme_ctrlr_opts *opts)
256{
257 printf("Attaching to %s\n", trid->traddr);
258
259 return true;
260}
261
262static void
263attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
264 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
265{
266 struct dev *dev;
267
268 /* add to dev list */
269 dev = &devs[num_devs++];
270
271 dev->ctrlr = ctrlr;
272
273 snprintf(dev->name, sizeof(dev->name), "%s",
274 trid->traddr);
275
276 printf("Attached to %s\n", dev->name);
277
278 dev->health_page = spdk_zmalloc(sizeof(*dev->health_page), 4096, NULL);
279 if (dev->health_page == NULL) {
280 printf("Allocation error (health page)\n");
281 failed = 1;
282 }
283}
284
285static void
286get_feature_cb(void *cb_arg, const struct spdk_nvme_cpl *cpl)
287{
288 struct dev *dev = cb_arg;
289
290 if (spdk_nvme_cpl_is_error(cpl)) {
291 printf("%s: get number of queues failed\n", dev->name);
292 failed = 1;
293 return;
294 }
295
296 get_queues_done++;
297}
298
299static void
300get_feature_test(struct dev *dev)
301{
302 struct spdk_nvme_cmd cmd[ADMINQ_SIZE];
303 int i;
304
305 memset(cmd, 0, sizeof(cmd));
306 for (i = 0; i < ADMINQ_SIZE; i++) {
307 cmd[i].opc = SPDK_NVME_OPC_GET_FEATURES;
308 cmd[i].cdw10 = SPDK_NVME_FEAT_NUMBER_OF_QUEUES;
309 if (spdk_nvme_ctrlr_cmd_admin_raw(dev->ctrlr, &cmd[i], NULL, 0,
310 get_feature_cb, dev) != 0) {
311 printf("Failed to send identify ctrlr command for dev=%p\n", dev);
312 failed = 1;
313 return;
314 }
315 }
316}
317
318int main(int argc, char **argv)
319{
320 struct dev *dev;
321 int i;
322 struct spdk_env_opts opts;
323 int rc;
324
325 rc = parse_args(argc, argv);
326 if (rc != 0) {
327 return rc;
328 }
329
330 spdk_env_opts_init(&opts);
331 opts.name = "aer";
332 opts.core_mask = "0x1";
333 spdk_env_init(&opts);
334
335 printf("Asynchronous Event Request test\n");
336
337 if (spdk_nvme_probe(&g_trid, NULL, probe_cb, attach_cb, NULL) != 0) {
338 fprintf(stderr, "spdk_nvme_probe() failed\n");
339 return 1;
340 }
341
342 if (failed) {
343 goto done;
344 }
345
346 printf("Registering asynchronous event callbacks...\n");
347 foreach_dev(dev) {
348 spdk_nvme_ctrlr_register_aer_callback(dev->ctrlr, aer_cb, dev);
349 }
350
351 printf("Getting temperature thresholds of all controllers...\n");
352 foreach_dev(dev) {
353 /* Get the original temperature threshold */
354 get_temp_threshold(dev);
355 }
356
357 while (!failed && temperature_done < num_devs) {
358 foreach_dev(dev) {
359 spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
360 }
361 }
362
363 if (failed) {
364 goto done;
365 }
366 temperature_done = 0;
367
368 /* Send enough admin commands to fill admin queue before triggering AER */
369 foreach_dev(dev) {
370 get_feature_test(dev);
371 }
372
373 if (failed) {
374 goto done;
375 }
376
377 printf("Waiting for all controllers to trigger AER...\n");
378 foreach_dev(dev) {
379 /* Set the temperature threshold to a low value */
380 set_temp_threshold(dev, 200);
381 }
382
383 /* Send enough admin commands to fill admin queue while waiting AER to be triggered */
384 foreach_dev(dev) {
385 get_feature_test(dev);
386 }
387
388 if (failed) {
389 goto done;
390 }
391
392 while (!failed && ((aer_done < num_devs) || (temperature_done < num_devs) ||
393 (get_queues_done < (2 * ADMINQ_SIZE * num_devs)))) {
394 foreach_dev(dev) {
395 spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
396 }
397 }
398
399 if (failed) {
400 goto done;
401 }
402
403 printf("Cleaning up...\n");
404
405 for (i = 0; i < num_devs; i++) {
406 struct dev *dev = &devs[i];
407
408 spdk_nvme_detach(dev->ctrlr);
409 }
410
411done:
412 cleanup();
413
414 return failed;
415}