]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/lib/event/subsystems/nvmf/nvmf_tgt.c
import 15.2.0 Octopus source
[ceph.git] / ceph / src / spdk / lib / event / subsystems / nvmf / nvmf_tgt.c
CommitLineData
11fdf7f2
TL
1/*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#include "event_nvmf.h"
35
36#include "spdk/bdev.h"
37#include "spdk/event.h"
38#include "spdk/thread.h"
39#include "spdk/log.h"
40#include "spdk/nvme.h"
41#include "spdk/util.h"
42
43enum nvmf_tgt_state {
44 NVMF_TGT_INIT_NONE = 0,
45 NVMF_TGT_INIT_PARSE_CONFIG,
46 NVMF_TGT_INIT_CREATE_POLL_GROUPS,
47 NVMF_TGT_INIT_START_SUBSYSTEMS,
48 NVMF_TGT_INIT_START_ACCEPTOR,
49 NVMF_TGT_RUNNING,
50 NVMF_TGT_FINI_STOP_SUBSYSTEMS,
51 NVMF_TGT_FINI_DESTROY_POLL_GROUPS,
52 NVMF_TGT_FINI_STOP_ACCEPTOR,
53 NVMF_TGT_FINI_FREE_RESOURCES,
54 NVMF_TGT_STOPPED,
55 NVMF_TGT_ERROR,
56};
57
58struct nvmf_tgt_poll_group {
9f95a23c
TL
59 struct spdk_nvmf_poll_group *group;
60 struct spdk_thread *thread;
61 TAILQ_ENTRY(nvmf_tgt_poll_group) link;
11fdf7f2
TL
62};
63
64struct nvmf_tgt_host_trid {
65 struct spdk_nvme_transport_id host_trid;
9f95a23c 66 struct nvmf_tgt_poll_group *pg;
11fdf7f2
TL
67 uint32_t ref;
68 TAILQ_ENTRY(nvmf_tgt_host_trid) link;
69};
70
71/* List of host trids that are connected to the target */
72static TAILQ_HEAD(, nvmf_tgt_host_trid) g_nvmf_tgt_host_trids =
73 TAILQ_HEAD_INITIALIZER(g_nvmf_tgt_host_trids);
74
75struct spdk_nvmf_tgt *g_spdk_nvmf_tgt = NULL;
76
77static enum nvmf_tgt_state g_tgt_state;
78
9f95a23c
TL
79/* Round-Robin/IP-based tracking of threads to poll group assignment */
80static struct nvmf_tgt_poll_group *g_next_poll_group = NULL;
11fdf7f2 81
9f95a23c 82static TAILQ_HEAD(, nvmf_tgt_poll_group) g_poll_groups = TAILQ_HEAD_INITIALIZER(g_poll_groups);
11fdf7f2
TL
83static size_t g_num_poll_groups = 0;
84
85static struct spdk_poller *g_acceptor_poller = NULL;
86
87static void nvmf_tgt_advance_state(void);
88
89static void
9f95a23c 90_spdk_nvmf_shutdown_cb(void *arg1)
11fdf7f2
TL
91{
92 /* Still in initialization state, defer shutdown operation */
93 if (g_tgt_state < NVMF_TGT_RUNNING) {
9f95a23c 94 spdk_thread_send_msg(spdk_get_thread(), _spdk_nvmf_shutdown_cb, NULL);
11fdf7f2
TL
95 return;
96 } else if (g_tgt_state > NVMF_TGT_RUNNING) {
97 /* Already in Shutdown status, ignore the signal */
98 return;
99 }
100
101 g_tgt_state = NVMF_TGT_FINI_STOP_SUBSYSTEMS;
102 nvmf_tgt_advance_state();
103}
104
105static void
106spdk_nvmf_subsystem_fini(void)
107{
9f95a23c 108 _spdk_nvmf_shutdown_cb(NULL);
11fdf7f2
TL
109}
110
9f95a23c
TL
111/* Round robin selection of poll groups */
112static struct nvmf_tgt_poll_group *
113spdk_nvmf_get_next_pg(void)
11fdf7f2 114{
9f95a23c 115 struct nvmf_tgt_poll_group *pg;
11fdf7f2 116
9f95a23c
TL
117 pg = g_next_poll_group;
118 g_next_poll_group = TAILQ_NEXT(pg, link);
119 if (g_next_poll_group == NULL) {
120 g_next_poll_group = TAILQ_FIRST(&g_poll_groups);
11fdf7f2
TL
121 }
122
9f95a23c 123 return pg;
11fdf7f2
TL
124}
125
126static void
127nvmf_tgt_remove_host_trid(struct spdk_nvmf_qpair *qpair)
128{
129 struct spdk_nvme_transport_id trid_to_remove;
130 struct nvmf_tgt_host_trid *trid = NULL, *tmp_trid = NULL;
131
132 if (g_spdk_nvmf_tgt_conf->conn_sched != CONNECT_SCHED_HOST_IP) {
133 return;
134 }
135
136 if (spdk_nvmf_qpair_get_peer_trid(qpair, &trid_to_remove) != 0) {
137 return;
138 }
139
140 TAILQ_FOREACH_SAFE(trid, &g_nvmf_tgt_host_trids, link, tmp_trid) {
141 if (trid && !strncmp(trid->host_trid.traddr,
142 trid_to_remove.traddr, SPDK_NVMF_TRADDR_MAX_LEN + 1)) {
143 trid->ref--;
144 if (trid->ref == 0) {
145 TAILQ_REMOVE(&g_nvmf_tgt_host_trids, trid, link);
146 free(trid);
147 }
148
149 break;
150 }
151 }
152
153 return;
154}
155
9f95a23c
TL
156static struct nvmf_tgt_poll_group *
157nvmf_tgt_get_pg(struct spdk_nvmf_qpair *qpair)
11fdf7f2
TL
158{
159 struct spdk_nvme_transport_id trid;
160 struct nvmf_tgt_host_trid *tmp_trid = NULL, *new_trid = NULL;
9f95a23c 161 struct nvmf_tgt_poll_group *pg;
11fdf7f2 162 int ret;
11fdf7f2
TL
163
164 switch (g_spdk_nvmf_tgt_conf->conn_sched) {
165 case CONNECT_SCHED_HOST_IP:
166 ret = spdk_nvmf_qpair_get_peer_trid(qpair, &trid);
167 if (ret) {
9f95a23c
TL
168 pg = g_next_poll_group;
169 SPDK_ERRLOG("Invalid host transport Id. Assigning to poll group %p\n", pg);
11fdf7f2
TL
170 break;
171 }
172
173 TAILQ_FOREACH(tmp_trid, &g_nvmf_tgt_host_trids, link) {
174 if (tmp_trid && !strncmp(tmp_trid->host_trid.traddr,
175 trid.traddr, SPDK_NVMF_TRADDR_MAX_LEN + 1)) {
176 tmp_trid->ref++;
9f95a23c 177 pg = tmp_trid->pg;
11fdf7f2
TL
178 break;
179 }
180 }
181 if (!tmp_trid) {
182 new_trid = calloc(1, sizeof(*new_trid));
183 if (!new_trid) {
9f95a23c
TL
184 pg = g_next_poll_group;
185 SPDK_ERRLOG("Insufficient memory. Assigning to poll group %p\n", pg);
11fdf7f2
TL
186 break;
187 }
9f95a23c
TL
188 /* Get the next available poll group for the new host */
189 pg = spdk_nvmf_get_next_pg();
190 new_trid->pg = pg;
11fdf7f2
TL
191 memcpy(new_trid->host_trid.traddr, trid.traddr,
192 SPDK_NVMF_TRADDR_MAX_LEN + 1);
193 TAILQ_INSERT_TAIL(&g_nvmf_tgt_host_trids, new_trid, link);
194 }
195 break;
196 case CONNECT_SCHED_ROUND_ROBIN:
197 default:
9f95a23c 198 pg = spdk_nvmf_get_next_pg();
11fdf7f2
TL
199 break;
200 }
201
9f95a23c
TL
202 return pg;
203}
204
205struct nvmf_tgt_pg_ctx {
206 struct spdk_nvmf_qpair *qpair;
207 struct nvmf_tgt_poll_group *pg;
208};
209
210static void
211nvmf_tgt_poll_group_add(void *_ctx)
212{
213 struct nvmf_tgt_pg_ctx *ctx = _ctx;
214 struct spdk_nvmf_qpair *qpair = ctx->qpair;
215 struct nvmf_tgt_poll_group *pg = ctx->pg;
216
217 free(_ctx);
218
219 if (spdk_nvmf_poll_group_add(pg->group, qpair) != 0) {
220 SPDK_ERRLOG("Unable to add the qpair to a poll group.\n");
221 spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
222 }
11fdf7f2
TL
223}
224
225static void
226new_qpair(struct spdk_nvmf_qpair *qpair)
227{
9f95a23c 228 struct nvmf_tgt_pg_ctx *ctx;
11fdf7f2 229 struct nvmf_tgt_poll_group *pg;
11fdf7f2
TL
230 uint32_t attempts;
231
232 if (g_tgt_state != NVMF_TGT_RUNNING) {
233 spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
234 return;
235 }
236
237 for (attempts = 0; attempts < g_num_poll_groups; attempts++) {
9f95a23c 238 pg = nvmf_tgt_get_pg(qpair);
11fdf7f2
TL
239 if (pg->group != NULL) {
240 break;
241 } else {
242 nvmf_tgt_remove_host_trid(qpair);
243 }
244 }
245
246 if (attempts == g_num_poll_groups) {
247 SPDK_ERRLOG("No poll groups exist.\n");
248 spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
249 return;
250 }
251
9f95a23c
TL
252 ctx = calloc(1, sizeof(*ctx));
253 if (!ctx) {
254 SPDK_ERRLOG("Unable to send message to poll group.\n");
255 spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
256 return;
257 }
258
259 ctx->qpair = qpair;
260 ctx->pg = pg;
261
262 spdk_thread_send_msg(pg->thread, nvmf_tgt_poll_group_add, ctx);
11fdf7f2
TL
263}
264
265static int
266acceptor_poll(void *arg)
267{
268 struct spdk_nvmf_tgt *tgt = arg;
269
270 spdk_nvmf_tgt_accept(tgt, new_qpair);
271
272 return -1;
273}
274
275static void
276nvmf_tgt_destroy_poll_group_done(void *ctx)
277{
278 g_tgt_state = NVMF_TGT_FINI_STOP_ACCEPTOR;
9f95a23c 279 assert(g_num_poll_groups == 0);
11fdf7f2
TL
280 nvmf_tgt_advance_state();
281}
282
283static void
284nvmf_tgt_destroy_poll_group(void *ctx)
285{
9f95a23c
TL
286 struct nvmf_tgt_poll_group *pg, *tpg;
287 struct spdk_thread *thread;
11fdf7f2 288
9f95a23c 289 thread = spdk_get_thread();
11fdf7f2 290
9f95a23c
TL
291 TAILQ_FOREACH_SAFE(pg, &g_poll_groups, link, tpg) {
292 if (pg->thread == thread) {
293 TAILQ_REMOVE(&g_poll_groups, pg, link);
294 spdk_nvmf_poll_group_destroy(pg->group);
295 free(pg);
296 assert(g_num_poll_groups > 0);
297 g_num_poll_groups--;
298 return;
299 }
11fdf7f2
TL
300 }
301}
302
303static void
304nvmf_tgt_create_poll_group_done(void *ctx)
305{
306 g_tgt_state = NVMF_TGT_INIT_START_SUBSYSTEMS;
307 nvmf_tgt_advance_state();
308}
309
310static void
311nvmf_tgt_create_poll_group(void *ctx)
312{
313 struct nvmf_tgt_poll_group *pg;
314
9f95a23c
TL
315 pg = calloc(1, sizeof(*pg));
316 if (!pg) {
317 SPDK_ERRLOG("Not enough memory to allocate poll groups\n");
318 spdk_app_stop(-ENOMEM);
319 return;
320 }
11fdf7f2 321
9f95a23c 322 pg->thread = spdk_get_thread();
11fdf7f2 323 pg->group = spdk_nvmf_poll_group_create(g_spdk_nvmf_tgt);
9f95a23c
TL
324 TAILQ_INSERT_TAIL(&g_poll_groups, pg, link);
325 g_num_poll_groups++;
326
327 if (g_next_poll_group == NULL) {
328 g_next_poll_group = pg;
329 }
11fdf7f2
TL
330}
331
332static void
333nvmf_tgt_subsystem_started(struct spdk_nvmf_subsystem *subsystem,
334 void *cb_arg, int status)
335{
336 subsystem = spdk_nvmf_subsystem_get_next(subsystem);
337
338 if (subsystem) {
339 spdk_nvmf_subsystem_start(subsystem, nvmf_tgt_subsystem_started, NULL);
340 return;
341 }
342
343 g_tgt_state = NVMF_TGT_INIT_START_ACCEPTOR;
344 nvmf_tgt_advance_state();
345}
346
347static void
348nvmf_tgt_subsystem_stopped(struct spdk_nvmf_subsystem *subsystem,
349 void *cb_arg, int status)
350{
351 subsystem = spdk_nvmf_subsystem_get_next(subsystem);
352
353 if (subsystem) {
354 spdk_nvmf_subsystem_stop(subsystem, nvmf_tgt_subsystem_stopped, NULL);
355 return;
356 }
357
358 g_tgt_state = NVMF_TGT_FINI_DESTROY_POLL_GROUPS;
359 nvmf_tgt_advance_state();
360}
361
362static void
363nvmf_tgt_destroy_done(void *ctx, int status)
364{
365 struct nvmf_tgt_host_trid *trid, *tmp_trid;
366
367 g_tgt_state = NVMF_TGT_STOPPED;
368
369 TAILQ_FOREACH_SAFE(trid, &g_nvmf_tgt_host_trids, link, tmp_trid) {
370 TAILQ_REMOVE(&g_nvmf_tgt_host_trids, trid, link);
371 free(trid);
372 }
373
374 free(g_spdk_nvmf_tgt_conf);
375 g_spdk_nvmf_tgt_conf = NULL;
376 nvmf_tgt_advance_state();
377}
378
379static void
380nvmf_tgt_parse_conf_done(int status)
381{
382 g_tgt_state = (status == 0) ? NVMF_TGT_INIT_CREATE_POLL_GROUPS : NVMF_TGT_ERROR;
383 nvmf_tgt_advance_state();
384}
385
386static void
387nvmf_tgt_parse_conf_start(void *ctx)
388{
389 if (spdk_nvmf_parse_conf(nvmf_tgt_parse_conf_done)) {
390 SPDK_ERRLOG("spdk_nvmf_parse_conf() failed\n");
391 g_tgt_state = NVMF_TGT_ERROR;
392 nvmf_tgt_advance_state();
393 }
394}
395
396static void
397nvmf_tgt_advance_state(void)
398{
399 enum nvmf_tgt_state prev_state;
400 int rc = -1;
401
402 do {
403 prev_state = g_tgt_state;
404
405 switch (g_tgt_state) {
406 case NVMF_TGT_INIT_NONE: {
407 g_tgt_state = NVMF_TGT_INIT_PARSE_CONFIG;
11fdf7f2
TL
408 break;
409 }
410 case NVMF_TGT_INIT_PARSE_CONFIG:
411 /* Send message to self to call parse conf func.
412 * Prevents it from possibly performing cb before getting
413 * out of this function, which causes problems. */
414 spdk_thread_send_msg(spdk_get_thread(), nvmf_tgt_parse_conf_start, NULL);
415 break;
416 case NVMF_TGT_INIT_CREATE_POLL_GROUPS:
417 /* Send a message to each thread and create a poll group */
418 spdk_for_each_thread(nvmf_tgt_create_poll_group,
419 NULL,
420 nvmf_tgt_create_poll_group_done);
421 break;
422 case NVMF_TGT_INIT_START_SUBSYSTEMS: {
423 struct spdk_nvmf_subsystem *subsystem;
424
425 subsystem = spdk_nvmf_subsystem_get_first(g_spdk_nvmf_tgt);
426
427 if (subsystem) {
428 spdk_nvmf_subsystem_start(subsystem, nvmf_tgt_subsystem_started, NULL);
429 } else {
430 g_tgt_state = NVMF_TGT_INIT_START_ACCEPTOR;
431 }
432 break;
433 }
434 case NVMF_TGT_INIT_START_ACCEPTOR:
435 g_acceptor_poller = spdk_poller_register(acceptor_poll, g_spdk_nvmf_tgt,
436 g_spdk_nvmf_tgt_conf->acceptor_poll_rate);
437 SPDK_INFOLOG(SPDK_LOG_NVMF, "Acceptor running\n");
438 g_tgt_state = NVMF_TGT_RUNNING;
439 break;
440 case NVMF_TGT_RUNNING:
441 spdk_subsystem_init_next(0);
442 break;
443 case NVMF_TGT_FINI_STOP_SUBSYSTEMS: {
444 struct spdk_nvmf_subsystem *subsystem;
445
446 subsystem = spdk_nvmf_subsystem_get_first(g_spdk_nvmf_tgt);
447
448 if (subsystem) {
449 spdk_nvmf_subsystem_stop(subsystem, nvmf_tgt_subsystem_stopped, NULL);
450 } else {
451 g_tgt_state = NVMF_TGT_FINI_DESTROY_POLL_GROUPS;
452 }
453 break;
454 }
455 case NVMF_TGT_FINI_DESTROY_POLL_GROUPS:
456 /* Send a message to each thread and destroy the poll group */
457 spdk_for_each_thread(nvmf_tgt_destroy_poll_group,
458 NULL,
459 nvmf_tgt_destroy_poll_group_done);
460 break;
461 case NVMF_TGT_FINI_STOP_ACCEPTOR:
462 spdk_poller_unregister(&g_acceptor_poller);
463 g_tgt_state = NVMF_TGT_FINI_FREE_RESOURCES;
464 break;
465 case NVMF_TGT_FINI_FREE_RESOURCES:
466 spdk_nvmf_tgt_destroy(g_spdk_nvmf_tgt, nvmf_tgt_destroy_done, NULL);
467 break;
468 case NVMF_TGT_STOPPED:
469 spdk_subsystem_fini_next();
470 return;
471 case NVMF_TGT_ERROR:
472 spdk_subsystem_init_next(rc);
473 return;
474 }
475
476 } while (g_tgt_state != prev_state);
477}
478
479static void
480spdk_nvmf_subsystem_init(void)
481{
482 g_tgt_state = NVMF_TGT_INIT_NONE;
483 nvmf_tgt_advance_state();
484}
485
486static char *
487get_conn_sched_string(enum spdk_nvmf_connect_sched sched)
488{
489 if (sched == CONNECT_SCHED_HOST_IP) {
490 return "hostip";
491 } else {
492 return "roundrobin";
493 }
494}
495
496static void
9f95a23c 497spdk_nvmf_subsystem_write_config_json(struct spdk_json_write_ctx *w)
11fdf7f2
TL
498{
499 spdk_json_write_array_begin(w);
500
501 spdk_json_write_object_begin(w);
502 spdk_json_write_named_string(w, "method", "set_nvmf_target_config");
503
504 spdk_json_write_named_object_begin(w, "params");
505 spdk_json_write_named_uint32(w, "acceptor_poll_rate", g_spdk_nvmf_tgt_conf->acceptor_poll_rate);
506 spdk_json_write_named_string(w, "conn_sched",
507 get_conn_sched_string(g_spdk_nvmf_tgt_conf->conn_sched));
508 spdk_json_write_object_end(w);
509 spdk_json_write_object_end(w);
510
511 spdk_nvmf_tgt_write_config_json(w, g_spdk_nvmf_tgt);
512 spdk_json_write_array_end(w);
11fdf7f2
TL
513}
514
515static struct spdk_subsystem g_spdk_subsystem_nvmf = {
516 .name = "nvmf",
517 .init = spdk_nvmf_subsystem_init,
518 .fini = spdk_nvmf_subsystem_fini,
519 .write_config_json = spdk_nvmf_subsystem_write_config_json,
520};
521
522SPDK_SUBSYSTEM_REGISTER(g_spdk_subsystem_nvmf)
523SPDK_SUBSYSTEM_DEPEND(nvmf, bdev)