4 * Copyright (c) Intel Corporation.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 #include <sys/types.h>
39 #include <netinet/in.h>
40 #include <arpa/inet.h>
43 #include <rte_config.h>
44 #include <rte_lcore.h>
48 #include "spdk/conf.h"
50 #include "spdk/bdev.h"
51 #include "spdk/nvme.h"
52 #include "spdk/nvmf.h"
53 #include "spdk/string.h"
54 #include "spdk/util.h"
56 #define MAX_LISTEN_ADDRESSES 255
58 #define PORTNUMSTRLEN 32
59 #define SPDK_NVMF_DEFAULT_SIN_PORT ((uint16_t)4420)
61 #define ACCEPT_TIMEOUT_US 10000 /* 10ms */
63 struct spdk_nvmf_probe_ctx
{
64 struct nvmf_tgt_subsystem
*app_subsystem
;
67 struct spdk_nvme_transport_id trid
;
70 #define MAX_STRING_LEN 255
72 #define SPDK_NVMF_CONFIG_QUEUES_PER_SESSION_DEFAULT 4
73 #define SPDK_NVMF_CONFIG_QUEUES_PER_SESSION_MIN 2
74 #define SPDK_NVMF_CONFIG_QUEUES_PER_SESSION_MAX 1024
76 #define SPDK_NVMF_CONFIG_QUEUE_DEPTH_DEFAULT 128
77 #define SPDK_NVMF_CONFIG_QUEUE_DEPTH_MIN 16
78 #define SPDK_NVMF_CONFIG_QUEUE_DEPTH_MAX 1024
80 #define SPDK_NVMF_CONFIG_IN_CAPSULE_DATA_SIZE_DEFAULT 4096
81 #define SPDK_NVMF_CONFIG_IN_CAPSULE_DATA_SIZE_MIN 4096
82 #define SPDK_NVMF_CONFIG_IN_CAPSULE_DATA_SIZE_MAX 131072
84 #define SPDK_NVMF_CONFIG_MAX_IO_SIZE_DEFAULT 131072
85 #define SPDK_NVMF_CONFIG_MAX_IO_SIZE_MIN 4096
86 #define SPDK_NVMF_CONFIG_MAX_IO_SIZE_MAX 131072
88 struct spdk_nvmf_tgt_conf g_spdk_nvmf_tgt_conf
;
89 static int32_t g_last_core
= -1;
92 spdk_get_numa_node_value(const char *path
)
96 char buf
[MAX_STRING_LEN
];
98 fd
= fopen(path
, "r");
103 if (fgets(buf
, sizeof(buf
), fd
) != NULL
) {
104 numa_node
= strtoul(buf
, NULL
, 10);
112 spdk_get_ifaddr_numa_node(const char *if_addr
)
115 struct ifaddrs
*ifaddrs
, *ifa
;
116 struct sockaddr_in addr
, addr_in
;
117 char path
[MAX_STRING_LEN
];
120 addr_in
.sin_addr
.s_addr
= inet_addr(if_addr
);
122 ret
= getifaddrs(&ifaddrs
);
126 for (ifa
= ifaddrs
; ifa
!= NULL
; ifa
= ifa
->ifa_next
) {
127 addr
= *(struct sockaddr_in
*)ifa
->ifa_addr
;
128 if ((uint32_t)addr_in
.sin_addr
.s_addr
!= (uint32_t)addr
.sin_addr
.s_addr
) {
131 snprintf(path
, MAX_STRING_LEN
, "/sys/class/net/%s/device/numa_node", ifa
->ifa_name
);
132 numa_node
= spdk_get_numa_node_value(path
);
135 freeifaddrs(ifaddrs
);
141 spdk_add_nvmf_discovery_subsystem(void)
143 struct nvmf_tgt_subsystem
*app_subsys
;
145 app_subsys
= nvmf_tgt_create_subsystem(SPDK_NVMF_DISCOVERY_NQN
, SPDK_NVMF_SUBTYPE_DISCOVERY
,
146 NVMF_SUBSYSTEM_MODE_DIRECT
,
147 rte_get_master_lcore());
148 if (app_subsys
== NULL
) {
149 SPDK_ERRLOG("Failed creating discovery nvmf library subsystem\n");
153 nvmf_tgt_start_subsystem(app_subsys
);
159 spdk_nvmf_parse_nvmf_tgt(void)
161 struct spdk_conf_section
*sp
;
163 int max_queues_per_sess
;
164 int in_capsule_data_size
;
167 int acceptor_poll_rate
;
170 sp
= spdk_conf_find_section(NULL
, "Nvmf");
172 SPDK_ERRLOG("No Nvmf section in configuration file.\n");
176 max_queue_depth
= spdk_conf_section_get_intval(sp
, "MaxQueueDepth");
177 if (max_queue_depth
< 0) {
178 max_queue_depth
= SPDK_NVMF_CONFIG_QUEUE_DEPTH_DEFAULT
;
180 max_queue_depth
= spdk_max(max_queue_depth
, SPDK_NVMF_CONFIG_QUEUE_DEPTH_MIN
);
181 max_queue_depth
= spdk_min(max_queue_depth
, SPDK_NVMF_CONFIG_QUEUE_DEPTH_MAX
);
183 max_queues_per_sess
= spdk_conf_section_get_intval(sp
, "MaxQueuesPerSession");
184 if (max_queues_per_sess
< 0) {
185 max_queues_per_sess
= SPDK_NVMF_CONFIG_QUEUES_PER_SESSION_DEFAULT
;
187 max_queues_per_sess
= spdk_max(max_queues_per_sess
, SPDK_NVMF_CONFIG_QUEUES_PER_SESSION_MIN
);
188 max_queues_per_sess
= spdk_min(max_queues_per_sess
, SPDK_NVMF_CONFIG_QUEUES_PER_SESSION_MAX
);
190 in_capsule_data_size
= spdk_conf_section_get_intval(sp
, "InCapsuleDataSize");
191 if (in_capsule_data_size
< 0) {
192 in_capsule_data_size
= SPDK_NVMF_CONFIG_IN_CAPSULE_DATA_SIZE_DEFAULT
;
193 } else if ((in_capsule_data_size
% 16) != 0) {
194 SPDK_ERRLOG("InCapsuleDataSize must be a multiple of 16\n");
197 in_capsule_data_size
= spdk_max(in_capsule_data_size
, SPDK_NVMF_CONFIG_IN_CAPSULE_DATA_SIZE_MIN
);
198 in_capsule_data_size
= spdk_min(in_capsule_data_size
, SPDK_NVMF_CONFIG_IN_CAPSULE_DATA_SIZE_MAX
);
200 max_io_size
= spdk_conf_section_get_intval(sp
, "MaxIOSize");
201 if (max_io_size
< 0) {
202 max_io_size
= SPDK_NVMF_CONFIG_MAX_IO_SIZE_DEFAULT
;
203 } else if ((max_io_size
% 4096) != 0) {
204 SPDK_ERRLOG("MaxIOSize must be a multiple of 4096\n");
207 max_io_size
= spdk_max(max_io_size
, SPDK_NVMF_CONFIG_MAX_IO_SIZE_MIN
);
208 max_io_size
= spdk_min(max_io_size
, SPDK_NVMF_CONFIG_MAX_IO_SIZE_MAX
);
210 acceptor_lcore
= spdk_conf_section_get_intval(sp
, "AcceptorCore");
211 if (acceptor_lcore
< 0) {
212 acceptor_lcore
= rte_lcore_id();
214 g_spdk_nvmf_tgt_conf
.acceptor_lcore
= acceptor_lcore
;
216 acceptor_poll_rate
= spdk_conf_section_get_intval(sp
, "AcceptorPollRate");
217 if (acceptor_poll_rate
< 0) {
218 acceptor_poll_rate
= ACCEPT_TIMEOUT_US
;
220 g_spdk_nvmf_tgt_conf
.acceptor_poll_rate
= acceptor_poll_rate
;
222 rc
= spdk_nvmf_tgt_init(max_queue_depth
, max_queues_per_sess
, in_capsule_data_size
, max_io_size
);
224 SPDK_ERRLOG("spdk_nvmf_tgt_init() failed\n");
228 rc
= spdk_add_nvmf_discovery_subsystem();
230 SPDK_ERRLOG("spdk_add_nvmf_discovery_subsystem failed\n");
238 probe_cb(void *cb_ctx
, const struct spdk_nvme_transport_id
*trid
,
239 struct spdk_nvme_ctrlr_opts
*opts
)
241 struct spdk_nvmf_probe_ctx
*ctx
= cb_ctx
;
243 if (ctx
->any
&& !ctx
->found
) {
248 if (strcmp(trid
->traddr
, ctx
->trid
.traddr
) == 0) {
257 attach_cb(void *cb_ctx
, const struct spdk_nvme_transport_id
*trid
,
258 struct spdk_nvme_ctrlr
*ctrlr
, const struct spdk_nvme_ctrlr_opts
*opts
)
260 struct spdk_nvmf_probe_ctx
*ctx
= cb_ctx
;
263 struct spdk_pci_addr pci_addr
;
264 struct spdk_pci_device
*pci_dev
;
266 spdk_pci_addr_parse(&pci_addr
, trid
->traddr
);
268 SPDK_NOTICELOG("Attaching NVMe device %p at %s to subsystem %s\n",
271 spdk_nvmf_subsystem_get_nqn(ctx
->app_subsystem
->subsystem
));
273 pci_dev
= spdk_pci_get_device(&pci_addr
);
275 numa_node
= spdk_pci_device_get_socket_id(pci_dev
);
277 if (numa_node
>= 0) {
278 /* Running subsystem and NVMe device is on the same socket or not */
279 if (spdk_env_get_socket_id(ctx
->app_subsystem
->lcore
) != (unsigned)numa_node
) {
280 SPDK_WARNLOG("Subsystem %s is configured to run on a CPU core %u belonging "
281 "to a different NUMA node than the associated NVMe device. "
282 "This may result in reduced performance.\n",
283 spdk_nvmf_subsystem_get_nqn(ctx
->app_subsystem
->subsystem
),
284 ctx
->app_subsystem
->lcore
);
285 SPDK_WARNLOG("The NVMe device is on socket %u\n", numa_node
);
286 SPDK_WARNLOG("The Subsystem is on socket %u\n",
287 spdk_env_get_socket_id(ctx
->app_subsystem
->lcore
));
291 rc
= nvmf_subsystem_add_ctrlr(ctx
->app_subsystem
->subsystem
, ctrlr
, &pci_addr
);
293 SPDK_ERRLOG("Failed to add controller to subsystem\n");
298 spdk_nvmf_validate_sn(const char *sn
)
303 if (len
> MAX_SN_LEN
) {
304 SPDK_ERRLOG("Invalid sn \"%s\": length %zu > max %d\n", sn
, len
, MAX_SN_LEN
);
312 spdk_nvmf_allocate_lcore(uint64_t mask
, uint32_t lcore
)
323 if (((mask
>> lcore
) & 1U) == 1U) {
326 lcore
= (lcore
+ 1) % 64;
327 } while (lcore
!= end
);
333 spdk_nvmf_parse_subsystem(struct spdk_conf_section
*sp
)
335 const char *nqn
, *mode_str
;
338 int num_listen_addrs
;
339 struct rpc_listen_address listen_addrs
[MAX_LISTEN_ADDRESSES
];
340 char *listen_addrs_str
[MAX_LISTEN_ADDRESSES
] = {};
342 char *hosts
[MAX_HOSTS
];
346 char *devs
[MAX_VIRTUAL_NAMESPACE
];
348 nqn
= spdk_conf_section_get_val(sp
, "NQN");
349 mode_str
= spdk_conf_section_get_val(sp
, "Mode");
350 lcore
= spdk_conf_section_get_intval(sp
, "Core");
352 /* Parse Listen sections */
353 num_listen_addrs
= 0;
354 for (i
= 0; i
< MAX_LISTEN_ADDRESSES
; i
++) {
355 listen_addrs
[num_listen_addrs
].transport
=
356 spdk_conf_section_get_nmval(sp
, "Listen", i
, 0);
357 if (!listen_addrs
[num_listen_addrs
].transport
) {
361 listen_addrs_str
[i
] = spdk_conf_section_get_nmval(sp
, "Listen", i
, 1);
362 if (!listen_addrs_str
[i
]) {
366 listen_addrs_str
[i
] = strdup(listen_addrs_str
[i
]);
368 ret
= spdk_parse_ip_addr(listen_addrs_str
[i
], &listen_addrs
[num_listen_addrs
].traddr
,
369 &listen_addrs
[num_listen_addrs
].trsvcid
);
371 SPDK_ERRLOG("Unable to parse listen address '%s'\n", listen_addrs_str
[i
]);
372 free(listen_addrs_str
[i
]);
373 listen_addrs_str
[i
] = NULL
;
380 /* Parse Host sections */
381 for (i
= 0; i
< MAX_HOSTS
; i
++) {
382 hosts
[i
] = spdk_conf_section_get_nval(sp
, "Host", i
);
389 bdf
= spdk_conf_section_get_val(sp
, "NVMe");
390 sn
= spdk_conf_section_get_val(sp
, "SN");
393 for (i
= 0; i
< MAX_VIRTUAL_NAMESPACE
; i
++) {
394 devs
[i
] = spdk_conf_section_get_nmval(sp
, "Namespace", i
, 0);
402 ret
= spdk_nvmf_construct_subsystem(nqn
, mode_str
, lcore
,
403 num_listen_addrs
, listen_addrs
,
408 for (i
= 0; i
< MAX_LISTEN_ADDRESSES
; i
++) {
409 free(listen_addrs_str
[i
]);
416 spdk_nvmf_parse_subsystems(void)
419 struct spdk_conf_section
*sp
;
421 sp
= spdk_conf_first_section(NULL
);
423 if (spdk_conf_section_match_prefix(sp
, "Subsystem")) {
424 rc
= spdk_nvmf_parse_subsystem(sp
);
429 sp
= spdk_conf_next_section(sp
);
435 spdk_nvmf_parse_conf(void)
440 rc
= spdk_nvmf_parse_nvmf_tgt();
445 /* Subsystem sections */
446 rc
= spdk_nvmf_parse_subsystems();
455 spdk_nvmf_construct_subsystem(const char *name
,
456 const char *mode_str
, int32_t lcore
,
457 int num_listen_addresses
, struct rpc_listen_address
*addresses
,
458 int num_hosts
, char *hosts
[], const char *bdf
,
459 const char *sn
, int num_devs
, char *dev_list
[])
461 struct spdk_nvmf_subsystem
*subsystem
;
462 struct nvmf_tgt_subsystem
*app_subsys
;
463 struct spdk_nvmf_listen_addr
*listen_addr
;
464 enum spdk_nvmf_subsystem_mode mode
;
469 SPDK_ERRLOG("No NQN specified for subsystem\n");
473 if (num_listen_addresses
> MAX_LISTEN_ADDRESSES
) {
474 SPDK_ERRLOG("invalid listen adresses number\n");
478 if (num_hosts
> MAX_HOSTS
) {
479 SPDK_ERRLOG("invalid hosts number\n");
484 lcore
= ++g_last_core
;
487 /* Determine which core to assign to the subsystem */
488 mask
= spdk_app_get_core_mask();
489 lcore
= spdk_nvmf_allocate_lcore(mask
, lcore
);
492 /* Determine the mode the subsysem will operate in */
493 if (mode_str
== NULL
) {
494 SPDK_ERRLOG("No Mode specified for Subsystem %s\n", name
);
498 if (strcasecmp(mode_str
, "Direct") == 0) {
499 mode
= NVMF_SUBSYSTEM_MODE_DIRECT
;
500 } else if (strcasecmp(mode_str
, "Virtual") == 0) {
501 mode
= NVMF_SUBSYSTEM_MODE_VIRTUAL
;
503 SPDK_ERRLOG("Invalid Subsystem mode: %s\n", mode_str
);
507 app_subsys
= nvmf_tgt_create_subsystem(name
, SPDK_NVMF_SUBTYPE_NVME
,
509 if (app_subsys
== NULL
) {
510 SPDK_ERRLOG("Subsystem creation failed\n");
513 subsystem
= app_subsys
->subsystem
;
515 /* Parse Listen sections */
516 for (i
= 0; i
< num_listen_addresses
; i
++) {
517 int nic_numa_node
= spdk_get_ifaddr_numa_node(addresses
[i
].traddr
);
518 unsigned subsys_numa_node
= spdk_env_get_socket_id(app_subsys
->lcore
);
520 if (nic_numa_node
>= 0) {
521 if (subsys_numa_node
!= (unsigned)nic_numa_node
) {
522 SPDK_WARNLOG("Subsystem %s is configured to run on a CPU core %d belonging "
523 "to a different NUMA node than the associated NIC. "
524 "This may result in reduced performance.\n",
526 SPDK_WARNLOG("The NIC is on socket %d\n", nic_numa_node
);
527 SPDK_WARNLOG("The Subsystem is on socket %u\n",
532 listen_addr
= spdk_nvmf_tgt_listen(addresses
[i
].transport
,
533 addresses
[i
].traddr
, addresses
[i
].trsvcid
);
534 if (listen_addr
== NULL
) {
535 SPDK_ERRLOG("Failed to listen on transport %s, traddr %s, trsvcid %s\n",
536 addresses
[i
].transport
,
538 addresses
[i
].trsvcid
);
541 spdk_nvmf_subsystem_add_listener(subsystem
, listen_addr
);
544 /* Parse Host sections */
545 for (i
= 0; i
< num_hosts
; i
++) {
546 spdk_nvmf_subsystem_add_host(subsystem
, hosts
[i
]);
549 if (mode
== NVMF_SUBSYSTEM_MODE_DIRECT
) {
550 struct spdk_nvmf_probe_ctx ctx
= { 0 };
551 struct spdk_nvme_transport_id trid
= {};
552 struct spdk_pci_addr pci_addr
= {};
555 SPDK_ERRLOG("Subsystem %s: missing NVMe directive\n", name
);
560 SPDK_ERRLOG("Subsystem %s: Namespaces not allowed for Direct mode\n", name
);
564 trid
.trtype
= SPDK_NVME_TRANSPORT_PCIE
;
565 ctx
.app_subsystem
= app_subsys
;
567 if (strcmp(bdf
, "*") == 0) {
570 if (spdk_pci_addr_parse(&pci_addr
, bdf
) < 0) {
571 SPDK_ERRLOG("Invalid format for NVMe BDF: %s\n", bdf
);
575 spdk_pci_addr_fmt(trid
.traddr
, sizeof(trid
.traddr
), &pci_addr
);
579 if (spdk_nvme_probe(&trid
, &ctx
, probe_cb
, attach_cb
, NULL
)) {
580 SPDK_ERRLOG("One or more controllers failed in spdk_nvme_probe()\n");
584 SPDK_ERRLOG("Could not find NVMe controller at PCI address %04x:%02x:%02x.%x\n",
585 pci_addr
.domain
, pci_addr
.bus
, pci_addr
.dev
, pci_addr
.func
);
589 struct spdk_bdev
*bdev
;
590 const char *namespace;
593 SPDK_ERRLOG("Subsystem %s: missing serial number\n", name
);
596 if (spdk_nvmf_validate_sn(sn
) != 0) {
600 if (num_devs
> MAX_VIRTUAL_NAMESPACE
) {
604 subsystem
->dev
.virt
.ns_count
= 0;
605 snprintf(subsystem
->dev
.virt
.sn
, MAX_SN_LEN
, "%s", sn
);
607 for (i
= 0; i
< num_devs
; i
++) {
608 namespace = dev_list
[i
];
610 SPDK_ERRLOG("Namespace %d: missing block device\n", i
);
613 bdev
= spdk_bdev_get_by_name(namespace);
615 SPDK_ERRLOG("Could not find namespace bdev '%s'\n", namespace);
618 if (spdk_nvmf_subsystem_add_ns(subsystem
, bdev
)) {
622 SPDK_NOTICELOG("Attaching block device %s to subsystem %s\n",
623 bdev
->name
, subsystem
->subnqn
);
628 nvmf_tgt_start_subsystem(app_subsys
);
633 spdk_nvmf_delete_subsystem(app_subsys
->subsystem
);
634 app_subsys
->subsystem
= NULL
;