2 * CXL Flash Device Driver
4 * Written by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>, IBM Corporation
5 * Matthew R. Ochs <mrochs@linux.vnet.ibm.com>, IBM Corporation
7 * Copyright (C) 2015 IBM Corporation
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
15 #include <linux/syscalls.h>
17 #include <asm/unaligned.h>
18 #include <asm/bitsperlong.h>
20 #include <scsi/scsi_cmnd.h>
21 #include <scsi/scsi_host.h>
22 #include <uapi/scsi/cxlflash_ioctl.h>
27 #include "superpipe.h"
30 * marshal_virt_to_resize() - translate uvirtual to resize structure
31 * @virt: Source structure from which to translate/copy.
32 * @resize: Destination structure for the translate/copy.
34 static void marshal_virt_to_resize(struct dk_cxlflash_uvirtual
*virt
,
35 struct dk_cxlflash_resize
*resize
)
37 resize
->hdr
= virt
->hdr
;
38 resize
->context_id
= virt
->context_id
;
39 resize
->rsrc_handle
= virt
->rsrc_handle
;
40 resize
->req_size
= virt
->lun_size
;
41 resize
->last_lba
= virt
->last_lba
;
45 * marshal_clone_to_rele() - translate clone to release structure
46 * @clone: Source structure from which to translate/copy.
47 * @rele: Destination structure for the translate/copy.
49 static void marshal_clone_to_rele(struct dk_cxlflash_clone
*clone
,
50 struct dk_cxlflash_release
*release
)
52 release
->hdr
= clone
->hdr
;
53 release
->context_id
= clone
->context_id_dst
;
57 * ba_init() - initializes a block allocator
58 * @ba_lun: Block allocator to initialize.
60 * Return: 0 on success, -errno on failure
62 static int ba_init(struct ba_lun
*ba_lun
)
64 struct ba_lun_info
*bali
= NULL
;
65 int lun_size_au
= 0, i
= 0;
66 int last_word_underflow
= 0;
69 pr_debug("%s: Initializing LUN: lun_id=%016llx "
70 "ba_lun->lsize=%lx ba_lun->au_size=%lX\n",
71 __func__
, ba_lun
->lun_id
, ba_lun
->lsize
, ba_lun
->au_size
);
73 /* Calculate bit map size */
74 lun_size_au
= ba_lun
->lsize
/ ba_lun
->au_size
;
75 if (lun_size_au
== 0) {
76 pr_debug("%s: Requested LUN size of 0!\n", __func__
);
80 /* Allocate lun information container */
81 bali
= kzalloc(sizeof(struct ba_lun_info
), GFP_KERNEL
);
82 if (unlikely(!bali
)) {
83 pr_err("%s: Failed to allocate lun_info lun_id=%016llx\n",
84 __func__
, ba_lun
->lun_id
);
88 bali
->total_aus
= lun_size_au
;
89 bali
->lun_bmap_size
= lun_size_au
/ BITS_PER_LONG
;
91 if (lun_size_au
% BITS_PER_LONG
)
92 bali
->lun_bmap_size
++;
94 /* Allocate bitmap space */
95 bali
->lun_alloc_map
= kzalloc((bali
->lun_bmap_size
* sizeof(u64
)),
97 if (unlikely(!bali
->lun_alloc_map
)) {
98 pr_err("%s: Failed to allocate lun allocation map: "
99 "lun_id=%016llx\n", __func__
, ba_lun
->lun_id
);
104 /* Initialize the bit map size and set all bits to '1' */
105 bali
->free_aun_cnt
= lun_size_au
;
107 for (i
= 0; i
< bali
->lun_bmap_size
; i
++)
108 bali
->lun_alloc_map
[i
] = 0xFFFFFFFFFFFFFFFFULL
;
110 /* If the last word not fully utilized, mark extra bits as allocated */
111 last_word_underflow
= (bali
->lun_bmap_size
* BITS_PER_LONG
);
112 last_word_underflow
-= bali
->free_aun_cnt
;
113 if (last_word_underflow
> 0) {
114 lam
= &bali
->lun_alloc_map
[bali
->lun_bmap_size
- 1];
115 for (i
= (HIBIT
- last_word_underflow
+ 1);
118 clear_bit(i
, (ulong
*)lam
);
121 /* Initialize high elevator index, low/curr already at 0 from kzalloc */
122 bali
->free_high_idx
= bali
->lun_bmap_size
;
124 /* Allocate clone map */
125 bali
->aun_clone_map
= kzalloc((bali
->total_aus
* sizeof(u8
)),
127 if (unlikely(!bali
->aun_clone_map
)) {
128 pr_err("%s: Failed to allocate clone map: lun_id=%016llx\n",
129 __func__
, ba_lun
->lun_id
);
130 kfree(bali
->lun_alloc_map
);
135 /* Pass the allocated LUN info as a handle to the user */
136 ba_lun
->ba_lun_handle
= bali
;
138 pr_debug("%s: Successfully initialized the LUN: "
139 "lun_id=%016llx bitmap size=%x, free_aun_cnt=%llx\n",
140 __func__
, ba_lun
->lun_id
, bali
->lun_bmap_size
,
146 * find_free_range() - locates a free bit within the block allocator
147 * @low: First word in block allocator to start search.
148 * @high: Last word in block allocator to search.
149 * @bali: LUN information structure owning the block allocator to search.
150 * @bit_word: Passes back the word in the block allocator owning the free bit.
152 * Return: The bit position within the passed back word, -1 on failure
154 static int find_free_range(u32 low
,
156 struct ba_lun_info
*bali
, int *bit_word
)
160 ulong
*lam
, num_bits
;
162 for (i
= low
; i
< high
; i
++)
163 if (bali
->lun_alloc_map
[i
] != 0) {
164 lam
= (ulong
*)&bali
->lun_alloc_map
[i
];
165 num_bits
= (sizeof(*lam
) * BITS_PER_BYTE
);
166 bit_pos
= find_first_bit(lam
, num_bits
);
168 pr_devel("%s: Found free bit %llu in LUN "
169 "map entry %016llx at bitmap index = %d\n",
170 __func__
, bit_pos
, bali
->lun_alloc_map
[i
], i
);
173 bali
->free_aun_cnt
--;
174 clear_bit(bit_pos
, lam
);
182 * ba_alloc() - allocates a block from the block allocator
183 * @ba_lun: Block allocator from which to allocate a block.
185 * Return: The allocated block, -1 on failure
187 static u64
ba_alloc(struct ba_lun
*ba_lun
)
191 struct ba_lun_info
*bali
= NULL
;
193 bali
= ba_lun
->ba_lun_handle
;
195 pr_debug("%s: Received block allocation request: "
196 "lun_id=%016llx free_aun_cnt=%llx\n",
197 __func__
, ba_lun
->lun_id
, bali
->free_aun_cnt
);
199 if (bali
->free_aun_cnt
== 0) {
200 pr_debug("%s: No space left on LUN: lun_id=%016llx\n",
201 __func__
, ba_lun
->lun_id
);
205 /* Search to find a free entry, curr->high then low->curr */
206 bit_pos
= find_free_range(bali
->free_curr_idx
,
207 bali
->free_high_idx
, bali
, &bit_word
);
209 bit_pos
= find_free_range(bali
->free_low_idx
,
213 pr_debug("%s: Could not find an allocation unit on LUN:"
214 " lun_id=%016llx\n", __func__
, ba_lun
->lun_id
);
219 /* Update the free_curr_idx */
220 if (bit_pos
== HIBIT
)
221 bali
->free_curr_idx
= bit_word
+ 1;
223 bali
->free_curr_idx
= bit_word
;
225 pr_debug("%s: Allocating AU number=%llx lun_id=%016llx "
226 "free_aun_cnt=%llx\n", __func__
,
227 ((bit_word
* BITS_PER_LONG
) + bit_pos
), ba_lun
->lun_id
,
230 return (u64
) ((bit_word
* BITS_PER_LONG
) + bit_pos
);
234 * validate_alloc() - validates the specified block has been allocated
235 * @ba_lun_info: LUN info owning the block allocator.
236 * @aun: Block to validate.
238 * Return: 0 on success, -1 on failure
240 static int validate_alloc(struct ba_lun_info
*bali
, u64 aun
)
242 int idx
= 0, bit_pos
= 0;
244 idx
= aun
/ BITS_PER_LONG
;
245 bit_pos
= aun
% BITS_PER_LONG
;
247 if (test_bit(bit_pos
, (ulong
*)&bali
->lun_alloc_map
[idx
]))
254 * ba_free() - frees a block from the block allocator
255 * @ba_lun: Block allocator from which to allocate a block.
256 * @to_free: Block to free.
258 * Return: 0 on success, -1 on failure
260 static int ba_free(struct ba_lun
*ba_lun
, u64 to_free
)
262 int idx
= 0, bit_pos
= 0;
263 struct ba_lun_info
*bali
= NULL
;
265 bali
= ba_lun
->ba_lun_handle
;
267 if (validate_alloc(bali
, to_free
)) {
268 pr_debug("%s: AUN %llx is not allocated on lun_id=%016llx\n",
269 __func__
, to_free
, ba_lun
->lun_id
);
273 pr_debug("%s: Received a request to free AU=%llx lun_id=%016llx "
274 "free_aun_cnt=%llx\n", __func__
, to_free
, ba_lun
->lun_id
,
277 if (bali
->aun_clone_map
[to_free
] > 0) {
278 pr_debug("%s: AUN %llx lun_id=%016llx cloned. Clone count=%x\n",
279 __func__
, to_free
, ba_lun
->lun_id
,
280 bali
->aun_clone_map
[to_free
]);
281 bali
->aun_clone_map
[to_free
]--;
285 idx
= to_free
/ BITS_PER_LONG
;
286 bit_pos
= to_free
% BITS_PER_LONG
;
288 set_bit(bit_pos
, (ulong
*)&bali
->lun_alloc_map
[idx
]);
289 bali
->free_aun_cnt
++;
291 if (idx
< bali
->free_low_idx
)
292 bali
->free_low_idx
= idx
;
293 else if (idx
> bali
->free_high_idx
)
294 bali
->free_high_idx
= idx
;
296 pr_debug("%s: Successfully freed AU bit_pos=%x bit map index=%x "
297 "lun_id=%016llx free_aun_cnt=%llx\n", __func__
, bit_pos
, idx
,
298 ba_lun
->lun_id
, bali
->free_aun_cnt
);
304 * ba_clone() - Clone a chunk of the block allocation table
305 * @ba_lun: Block allocator from which to allocate a block.
306 * @to_free: Block to free.
308 * Return: 0 on success, -1 on failure
310 static int ba_clone(struct ba_lun
*ba_lun
, u64 to_clone
)
312 struct ba_lun_info
*bali
= ba_lun
->ba_lun_handle
;
314 if (validate_alloc(bali
, to_clone
)) {
315 pr_debug("%s: AUN=%llx not allocated on lun_id=%016llx\n",
316 __func__
, to_clone
, ba_lun
->lun_id
);
320 pr_debug("%s: Received a request to clone AUN %llx on lun_id=%016llx\n",
321 __func__
, to_clone
, ba_lun
->lun_id
);
323 if (bali
->aun_clone_map
[to_clone
] == MAX_AUN_CLONE_CNT
) {
324 pr_debug("%s: AUN %llx on lun_id=%016llx hit max clones already\n",
325 __func__
, to_clone
, ba_lun
->lun_id
);
329 bali
->aun_clone_map
[to_clone
]++;
335 * ba_space() - returns the amount of free space left in the block allocator
336 * @ba_lun: Block allocator.
338 * Return: Amount of free space in block allocator
340 static u64
ba_space(struct ba_lun
*ba_lun
)
342 struct ba_lun_info
*bali
= ba_lun
->ba_lun_handle
;
344 return bali
->free_aun_cnt
;
348 * cxlflash_ba_terminate() - frees resources associated with the block allocator
349 * @ba_lun: Block allocator.
351 * Safe to call in a partially allocated state.
353 void cxlflash_ba_terminate(struct ba_lun
*ba_lun
)
355 struct ba_lun_info
*bali
= ba_lun
->ba_lun_handle
;
358 kfree(bali
->aun_clone_map
);
359 kfree(bali
->lun_alloc_map
);
361 ba_lun
->ba_lun_handle
= NULL
;
366 * init_vlun() - initializes a LUN for virtual use
367 * @lun_info: LUN information structure that owns the block allocator.
369 * Return: 0 on success, -errno on failure
371 static int init_vlun(struct llun_info
*lli
)
374 struct glun_info
*gli
= lli
->parent
;
375 struct blka
*blka
= &gli
->blka
;
377 memset(blka
, 0, sizeof(*blka
));
378 mutex_init(&blka
->mutex
);
380 /* LUN IDs are unique per port, save the index instead */
381 blka
->ba_lun
.lun_id
= lli
->lun_index
;
382 blka
->ba_lun
.lsize
= gli
->max_lba
+ 1;
383 blka
->ba_lun
.lba_size
= gli
->blk_len
;
385 blka
->ba_lun
.au_size
= MC_CHUNK_SIZE
;
386 blka
->nchunk
= blka
->ba_lun
.lsize
/ MC_CHUNK_SIZE
;
388 rc
= ba_init(&blka
->ba_lun
);
390 pr_debug("%s: cannot init block_alloc, rc=%d\n", __func__
, rc
);
392 pr_debug("%s: returning rc=%d lli=%p\n", __func__
, rc
, lli
);
397 * write_same16() - sends a SCSI WRITE_SAME16 (0) command to specified LUN
398 * @sdev: SCSI device associated with LUN.
399 * @lba: Logical block address to start write same.
400 * @nblks: Number of logical blocks to write same.
402 * The SCSI WRITE_SAME16 can take quite a while to complete. Should an EEH occur
403 * while in scsi_execute(), the EEH handler will attempt to recover. As part of
404 * the recovery, the handler drains all currently running ioctls, waiting until
405 * they have completed before proceeding with a reset. As this routine is used
406 * on the ioctl path, this can create a condition where the EEH handler becomes
407 * stuck, infinitely waiting for this ioctl thread. To avoid this behavior,
408 * temporarily unmark this thread as an ioctl thread by releasing the ioctl read
409 * semaphore. This will allow the EEH handler to proceed with a recovery while
410 * this thread is still running. Once the scsi_execute() returns, reacquire the
411 * ioctl read semaphore and check the adapter state in case it changed while
412 * inside of scsi_execute(). The state check will wait if the adapter is still
413 * being recovered or return a failure if the recovery failed. In the event that
414 * the adapter reset failed, simply return the failure as the ioctl would be
415 * unable to continue.
417 * Note that the above puts a requirement on this routine to only be called on
420 * Return: 0 on success, -errno on failure
422 static int write_same16(struct scsi_device
*sdev
,
428 u8
*sense_buf
= NULL
;
431 int ws_limit
= SISLITE_MAX_WS_BLOCKS
;
434 u32 to
= sdev
->request_queue
->rq_timeout
;
435 struct cxlflash_cfg
*cfg
= shost_priv(sdev
->host
);
436 struct device
*dev
= &cfg
->dev
->dev
;
438 cmd_buf
= kzalloc(CMD_BUFSIZE
, GFP_KERNEL
);
439 scsi_cmd
= kzalloc(MAX_COMMAND_SIZE
, GFP_KERNEL
);
440 sense_buf
= kzalloc(SCSI_SENSE_BUFFERSIZE
, GFP_KERNEL
);
441 if (unlikely(!cmd_buf
|| !scsi_cmd
|| !sense_buf
)) {
448 scsi_cmd
[0] = WRITE_SAME_16
;
449 put_unaligned_be64(offset
, &scsi_cmd
[2]);
450 put_unaligned_be32(ws_limit
< left
? ws_limit
: left
,
453 /* Drop the ioctl read semahpore across lengthy call */
454 up_read(&cfg
->ioctl_rwsem
);
455 result
= scsi_execute(sdev
, scsi_cmd
, DMA_TO_DEVICE
, cmd_buf
,
456 CMD_BUFSIZE
, sense_buf
, to
, CMD_RETRIES
,
458 down_read(&cfg
->ioctl_rwsem
);
459 rc
= check_state(cfg
);
461 dev_err(dev
, "%s: Failed state result=%08x\n",
468 dev_err_ratelimited(dev
, "%s: command failed for "
469 "offset=%lld result=%08x\n",
470 __func__
, offset
, result
);
482 dev_dbg(dev
, "%s: returning rc=%d\n", __func__
, rc
);
487 * grow_lxt() - expands the translation table associated with the specified RHTE
488 * @afu: AFU associated with the host.
489 * @sdev: SCSI device associated with LUN.
490 * @ctxid: Context ID of context owning the RHTE.
491 * @rhndl: Resource handle associated with the RHTE.
492 * @rhte: Resource handle entry (RHTE).
493 * @new_size: Number of translation entries associated with RHTE.
495 * By design, this routine employs a 'best attempt' allocation and will
496 * truncate the requested size down if there is not sufficient space in
497 * the block allocator to satisfy the request but there does exist some
498 * amount of space. The user is made aware of this by returning the size
501 * Return: 0 on success, -errno on failure
503 static int grow_lxt(struct afu
*afu
,
504 struct scsi_device
*sdev
,
507 struct sisl_rht_entry
*rhte
,
510 struct cxlflash_cfg
*cfg
= shost_priv(sdev
->host
);
511 struct device
*dev
= &cfg
->dev
->dev
;
512 struct sisl_lxt_entry
*lxt
= NULL
, *lxt_old
= NULL
;
513 struct llun_info
*lli
= sdev
->hostdata
;
514 struct glun_info
*gli
= lli
->parent
;
515 struct blka
*blka
= &gli
->blka
;
517 u32 ngrps
, ngrps_old
;
518 u64 aun
; /* chunk# allocated by block allocator */
519 u64 delta
= *new_size
- rhte
->lxt_cnt
;
524 * Check what is available in the block allocator before re-allocating
525 * LXT array. This is done up front under the mutex which must not be
526 * released until after allocation is complete.
528 mutex_lock(&blka
->mutex
);
529 av_size
= ba_space(&blka
->ba_lun
);
530 if (unlikely(av_size
<= 0)) {
531 dev_dbg(dev
, "%s: ba_space error av_size=%d\n",
533 mutex_unlock(&blka
->mutex
);
541 lxt_old
= rhte
->lxt_start
;
542 ngrps_old
= LXT_NUM_GROUPS(rhte
->lxt_cnt
);
543 ngrps
= LXT_NUM_GROUPS(rhte
->lxt_cnt
+ delta
);
545 if (ngrps
!= ngrps_old
) {
546 /* reallocate to fit new size */
547 lxt
= kzalloc((sizeof(*lxt
) * LXT_GROUP_SIZE
* ngrps
),
549 if (unlikely(!lxt
)) {
550 mutex_unlock(&blka
->mutex
);
555 /* copy over all old entries */
556 memcpy(lxt
, lxt_old
, (sizeof(*lxt
) * rhte
->lxt_cnt
));
560 /* nothing can fail from now on */
561 my_new_size
= rhte
->lxt_cnt
+ delta
;
563 /* add new entries to the end */
564 for (i
= rhte
->lxt_cnt
; i
< my_new_size
; i
++) {
566 * Due to the earlier check of available space, ba_alloc
567 * cannot fail here. If it did due to internal error,
568 * leave a rlba_base of -1u which will likely be a
569 * invalid LUN (too large).
571 aun
= ba_alloc(&blka
->ba_lun
);
572 if ((aun
== -1ULL) || (aun
>= blka
->nchunk
))
573 dev_dbg(dev
, "%s: ba_alloc error allocated chunk=%llu "
574 "max=%llu\n", __func__
, aun
, blka
->nchunk
- 1);
576 /* select both ports, use r/w perms from RHT */
577 lxt
[i
].rlba_base
= ((aun
<< MC_CHUNK_SHIFT
) |
578 (lli
->lun_index
<< LXT_LUNIDX_SHIFT
) |
579 (RHT_PERM_RW
<< LXT_PERM_SHIFT
|
583 mutex_unlock(&blka
->mutex
);
586 * The following sequence is prescribed in the SISlite spec
587 * for syncing up with the AFU when adding LXT entries.
589 dma_wmb(); /* Make LXT updates are visible */
591 rhte
->lxt_start
= lxt
;
592 dma_wmb(); /* Make RHT entry's LXT table update visible */
594 rhte
->lxt_cnt
= my_new_size
;
595 dma_wmb(); /* Make RHT entry's LXT table size update visible */
597 cxlflash_afu_sync(afu
, ctxid
, rhndl
, AFU_LW_SYNC
);
599 /* free old lxt if reallocated */
602 *new_size
= my_new_size
;
604 dev_dbg(dev
, "%s: returning rc=%d\n", __func__
, rc
);
609 * shrink_lxt() - reduces translation table associated with the specified RHTE
610 * @afu: AFU associated with the host.
611 * @sdev: SCSI device associated with LUN.
612 * @rhndl: Resource handle associated with the RHTE.
613 * @rhte: Resource handle entry (RHTE).
614 * @ctxi: Context owning resources.
615 * @new_size: Number of translation entries associated with RHTE.
617 * Return: 0 on success, -errno on failure
619 static int shrink_lxt(struct afu
*afu
,
620 struct scsi_device
*sdev
,
622 struct sisl_rht_entry
*rhte
,
623 struct ctx_info
*ctxi
,
626 struct cxlflash_cfg
*cfg
= shost_priv(sdev
->host
);
627 struct device
*dev
= &cfg
->dev
->dev
;
628 struct sisl_lxt_entry
*lxt
, *lxt_old
;
629 struct llun_info
*lli
= sdev
->hostdata
;
630 struct glun_info
*gli
= lli
->parent
;
631 struct blka
*blka
= &gli
->blka
;
632 ctx_hndl_t ctxid
= DECODE_CTXID(ctxi
->ctxid
);
633 bool needs_ws
= ctxi
->rht_needs_ws
[rhndl
];
634 bool needs_sync
= !ctxi
->err_recovery_active
;
635 u32 ngrps
, ngrps_old
;
636 u64 aun
; /* chunk# allocated by block allocator */
637 u64 delta
= rhte
->lxt_cnt
- *new_size
;
641 lxt_old
= rhte
->lxt_start
;
642 ngrps_old
= LXT_NUM_GROUPS(rhte
->lxt_cnt
);
643 ngrps
= LXT_NUM_GROUPS(rhte
->lxt_cnt
- delta
);
645 if (ngrps
!= ngrps_old
) {
646 /* Reallocate to fit new size unless new size is 0 */
648 lxt
= kzalloc((sizeof(*lxt
) * LXT_GROUP_SIZE
* ngrps
),
650 if (unlikely(!lxt
)) {
655 /* Copy over old entries that will remain */
657 (sizeof(*lxt
) * (rhte
->lxt_cnt
- delta
)));
663 /* Nothing can fail from now on */
664 my_new_size
= rhte
->lxt_cnt
- delta
;
667 * The following sequence is prescribed in the SISlite spec
668 * for syncing up with the AFU when removing LXT entries.
670 rhte
->lxt_cnt
= my_new_size
;
671 dma_wmb(); /* Make RHT entry's LXT table size update visible */
673 rhte
->lxt_start
= lxt
;
674 dma_wmb(); /* Make RHT entry's LXT table update visible */
677 cxlflash_afu_sync(afu
, ctxid
, rhndl
, AFU_HW_SYNC
);
681 * Mark the context as unavailable, so that we can release
684 ctxi
->unavail
= true;
685 mutex_unlock(&ctxi
->mutex
);
688 /* Free LBAs allocated to freed chunks */
689 mutex_lock(&blka
->mutex
);
690 for (i
= delta
- 1; i
>= 0; i
--) {
691 /* Mask the higher 48 bits before shifting, even though
694 aun
= (lxt_old
[my_new_size
+ i
].rlba_base
& SISL_ASTATUS_MASK
);
695 aun
= (aun
>> MC_CHUNK_SHIFT
);
697 write_same16(sdev
, aun
, MC_CHUNK_SIZE
);
698 ba_free(&blka
->ba_lun
, aun
);
700 mutex_unlock(&blka
->mutex
);
703 /* Make the context visible again */
704 mutex_lock(&ctxi
->mutex
);
705 ctxi
->unavail
= false;
708 /* Free old lxt if reallocated */
711 *new_size
= my_new_size
;
713 dev_dbg(dev
, "%s: returning rc=%d\n", __func__
, rc
);
718 * _cxlflash_vlun_resize() - changes the size of a virtual LUN
719 * @sdev: SCSI device associated with LUN owning virtual LUN.
720 * @ctxi: Context owning resources.
721 * @resize: Resize ioctl data structure.
723 * On successful return, the user is informed of the new size (in blocks)
724 * of the virtual LUN in last LBA format. When the size of the virtual
725 * LUN is zero, the last LBA is reflected as -1. See comment in the
726 * prologue for _cxlflash_disk_release() regarding AFU syncs and contexts
727 * on the error recovery list.
729 * Return: 0 on success, -errno on failure
731 int _cxlflash_vlun_resize(struct scsi_device
*sdev
,
732 struct ctx_info
*ctxi
,
733 struct dk_cxlflash_resize
*resize
)
735 struct cxlflash_cfg
*cfg
= shost_priv(sdev
->host
);
736 struct device
*dev
= &cfg
->dev
->dev
;
737 struct llun_info
*lli
= sdev
->hostdata
;
738 struct glun_info
*gli
= lli
->parent
;
739 struct afu
*afu
= cfg
->afu
;
740 bool put_ctx
= false;
742 res_hndl_t rhndl
= resize
->rsrc_handle
;
745 u64 ctxid
= DECODE_CTXID(resize
->context_id
),
746 rctxid
= resize
->context_id
;
748 struct sisl_rht_entry
*rhte
;
753 * The requested size (req_size) is always assumed to be in 4k blocks,
754 * so we have to convert it here from 4k to chunk size.
756 nsectors
= (resize
->req_size
* CXLFLASH_BLOCK_SIZE
) / gli
->blk_len
;
757 new_size
= DIV_ROUND_UP(nsectors
, MC_CHUNK_SIZE
);
759 dev_dbg(dev
, "%s: ctxid=%llu rhndl=%llu req_size=%llu new_size=%llu\n",
760 __func__
, ctxid
, resize
->rsrc_handle
, resize
->req_size
,
763 if (unlikely(gli
->mode
!= MODE_VIRTUAL
)) {
764 dev_dbg(dev
, "%s: LUN mode does not support resize mode=%d\n",
765 __func__
, gli
->mode
);
772 ctxi
= get_context(cfg
, rctxid
, lli
, CTX_CTRL_ERR_FALLBACK
);
773 if (unlikely(!ctxi
)) {
774 dev_dbg(dev
, "%s: Bad context ctxid=%llu\n",
783 rhte
= get_rhte(ctxi
, rhndl
, lli
);
784 if (unlikely(!rhte
)) {
785 dev_dbg(dev
, "%s: Bad resource handle rhndl=%u\n",
791 if (new_size
> rhte
->lxt_cnt
)
792 rc
= grow_lxt(afu
, sdev
, ctxid
, rhndl
, rhte
, &new_size
);
793 else if (new_size
< rhte
->lxt_cnt
)
794 rc
= shrink_lxt(afu
, sdev
, rhndl
, rhte
, ctxi
, &new_size
);
796 resize
->hdr
.return_flags
= 0;
797 resize
->last_lba
= (new_size
* MC_CHUNK_SIZE
* gli
->blk_len
);
798 resize
->last_lba
/= CXLFLASH_BLOCK_SIZE
;
804 dev_dbg(dev
, "%s: resized to %llu returning rc=%d\n",
805 __func__
, resize
->last_lba
, rc
);
809 int cxlflash_vlun_resize(struct scsi_device
*sdev
,
810 struct dk_cxlflash_resize
*resize
)
812 return _cxlflash_vlun_resize(sdev
, NULL
, resize
);
816 * cxlflash_restore_luntable() - Restore LUN table to prior state
817 * @cfg: Internal structure associated with the host.
819 void cxlflash_restore_luntable(struct cxlflash_cfg
*cfg
)
821 struct llun_info
*lli
, *temp
;
824 struct afu
*afu
= cfg
->afu
;
825 struct device
*dev
= &cfg
->dev
->dev
;
826 struct sisl_global_map __iomem
*agm
= &afu
->afu_map
->global
;
828 mutex_lock(&global
.mutex
);
830 list_for_each_entry_safe(lli
, temp
, &cfg
->lluns
, list
) {
834 lind
= lli
->lun_index
;
835 dev_dbg(dev
, "%s: Virtual LUNs on slot %d:\n", __func__
, lind
);
837 for (k
= 0; k
< cfg
->num_fc_ports
; k
++)
838 if (lli
->port_sel
& (1 << k
)) {
839 writeq_be(lli
->lun_id
[k
],
840 &agm
->fc_port
[k
][lind
]);
841 dev_dbg(dev
, "\t%d=%llx\n", k
, lli
->lun_id
[k
]);
845 mutex_unlock(&global
.mutex
);
849 * get_num_ports() - compute number of ports from port selection mask
850 * @psm: Port selection mask.
852 * Return: Population count of port selection mask
854 static inline u8
get_num_ports(u32 psm
)
856 static const u8 bits
[16] = { 0, 1, 1, 2, 1, 2, 2, 3,
857 1, 2, 2, 3, 2, 3, 3, 4 };
859 return bits
[psm
& 0xf];
863 * init_luntable() - write an entry in the LUN table
864 * @cfg: Internal structure associated with the host.
865 * @lli: Per adapter LUN information structure.
867 * On successful return, a LUN table entry is created:
868 * - at the top for LUNs visible on multiple ports.
869 * - at the bottom for LUNs visible only on one port.
871 * Return: 0 on success, -errno on failure
873 static int init_luntable(struct cxlflash_cfg
*cfg
, struct llun_info
*lli
)
880 struct afu
*afu
= cfg
->afu
;
881 struct device
*dev
= &cfg
->dev
->dev
;
882 struct sisl_global_map __iomem
*agm
= &afu
->afu_map
->global
;
884 mutex_lock(&global
.mutex
);
889 nports
= get_num_ports(lli
->port_sel
);
890 if (nports
== 0 || nports
> cfg
->num_fc_ports
) {
891 WARN(1, "Unsupported port configuration nports=%u", nports
);
898 * When LUN is visible from multiple ports, we will put
899 * it in the top half of the LUN table.
901 for (k
= 0; k
< cfg
->num_fc_ports
; k
++) {
902 if (!(lli
->port_sel
& (1 << k
)))
905 if (cfg
->promote_lun_index
== cfg
->last_lun_index
[k
]) {
911 lind
= lli
->lun_index
= cfg
->promote_lun_index
;
912 dev_dbg(dev
, "%s: Virtual LUNs on slot %d:\n", __func__
, lind
);
914 for (k
= 0; k
< cfg
->num_fc_ports
; k
++) {
915 if (!(lli
->port_sel
& (1 << k
)))
918 writeq_be(lli
->lun_id
[k
], &agm
->fc_port
[k
][lind
]);
919 dev_dbg(dev
, "\t%d=%llx\n", k
, lli
->lun_id
[k
]);
922 cfg
->promote_lun_index
++;
925 * When LUN is visible only from one port, we will put
926 * it in the bottom half of the LUN table.
928 chan
= PORTMASK2CHAN(lli
->port_sel
);
929 if (cfg
->promote_lun_index
== cfg
->last_lun_index
[chan
]) {
934 lind
= lli
->lun_index
= cfg
->last_lun_index
[chan
];
935 writeq_be(lli
->lun_id
[chan
], &agm
->fc_port
[chan
][lind
]);
936 cfg
->last_lun_index
[chan
]--;
937 dev_dbg(dev
, "%s: Virtual LUNs on slot %d:\n\t%d=%llx\n",
938 __func__
, lind
, chan
, lli
->lun_id
[chan
]);
941 lli
->in_table
= true;
943 mutex_unlock(&global
.mutex
);
944 dev_dbg(dev
, "%s: returning rc=%d\n", __func__
, rc
);
949 * cxlflash_disk_virtual_open() - open a virtual disk of specified size
950 * @sdev: SCSI device associated with LUN owning virtual LUN.
951 * @arg: UVirtual ioctl data structure.
953 * On successful return, the user is informed of the resource handle
954 * to be used to identify the virtual LUN and the size (in blocks) of
955 * the virtual LUN in last LBA format. When the size of the virtual LUN
956 * is zero, the last LBA is reflected as -1.
958 * Return: 0 on success, -errno on failure
960 int cxlflash_disk_virtual_open(struct scsi_device
*sdev
, void *arg
)
962 struct cxlflash_cfg
*cfg
= shost_priv(sdev
->host
);
963 struct device
*dev
= &cfg
->dev
->dev
;
964 struct llun_info
*lli
= sdev
->hostdata
;
965 struct glun_info
*gli
= lli
->parent
;
967 struct dk_cxlflash_uvirtual
*virt
= (struct dk_cxlflash_uvirtual
*)arg
;
968 struct dk_cxlflash_resize resize
;
970 u64 ctxid
= DECODE_CTXID(virt
->context_id
),
971 rctxid
= virt
->context_id
;
972 u64 lun_size
= virt
->lun_size
;
974 u64 rsrc_handle
= -1;
978 struct ctx_info
*ctxi
= NULL
;
979 struct sisl_rht_entry
*rhte
= NULL
;
981 dev_dbg(dev
, "%s: ctxid=%llu ls=%llu\n", __func__
, ctxid
, lun_size
);
983 /* Setup the LUNs block allocator on first call */
984 mutex_lock(&gli
->mutex
);
985 if (gli
->mode
== MODE_NONE
) {
988 dev_err(dev
, "%s: init_vlun failed rc=%d\n",
995 rc
= cxlflash_lun_attach(gli
, MODE_VIRTUAL
, true);
997 dev_err(dev
, "%s: Failed attach to LUN (VIRTUAL)\n", __func__
);
1000 mutex_unlock(&gli
->mutex
);
1002 rc
= init_luntable(cfg
, lli
);
1004 dev_err(dev
, "%s: init_luntable failed rc=%d\n", __func__
, rc
);
1008 ctxi
= get_context(cfg
, rctxid
, lli
, 0);
1009 if (unlikely(!ctxi
)) {
1010 dev_err(dev
, "%s: Bad context ctxid=%llu\n", __func__
, ctxid
);
1015 rhte
= rhte_checkout(ctxi
, lli
);
1016 if (unlikely(!rhte
)) {
1017 dev_err(dev
, "%s: too many opens ctxid=%llu\n",
1019 rc
= -EMFILE
; /* too many opens */
1023 rsrc_handle
= (rhte
- ctxi
->rht_start
);
1025 /* Populate RHT format 0 */
1026 rhte
->nmask
= MC_RHT_NMASK
;
1027 rhte
->fp
= SISL_RHT_FP(0U, ctxi
->rht_perms
);
1029 /* Resize even if requested size is 0 */
1030 marshal_virt_to_resize(virt
, &resize
);
1031 resize
.rsrc_handle
= rsrc_handle
;
1032 rc
= _cxlflash_vlun_resize(sdev
, ctxi
, &resize
);
1034 dev_err(dev
, "%s: resize failed rc=%d\n", __func__
, rc
);
1037 last_lba
= resize
.last_lba
;
1039 if (virt
->hdr
.flags
& DK_CXLFLASH_UVIRTUAL_NEED_WRITE_SAME
)
1040 ctxi
->rht_needs_ws
[rsrc_handle
] = true;
1042 virt
->hdr
.return_flags
= 0;
1043 virt
->last_lba
= last_lba
;
1044 virt
->rsrc_handle
= rsrc_handle
;
1046 if (get_num_ports(lli
->port_sel
) > 1)
1047 virt
->hdr
.return_flags
|= DK_CXLFLASH_ALL_PORTS_ACTIVE
;
1051 dev_dbg(dev
, "%s: returning handle=%llu rc=%d llba=%llu\n",
1052 __func__
, rsrc_handle
, rc
, last_lba
);
1056 rhte_checkin(ctxi
, rhte
);
1058 cxlflash_lun_detach(gli
);
1061 /* Special common cleanup prior to successful LUN attach */
1062 cxlflash_ba_terminate(&gli
->blka
.ba_lun
);
1063 mutex_unlock(&gli
->mutex
);
1068 * clone_lxt() - copies translation tables from source to destination RHTE
1069 * @afu: AFU associated with the host.
1070 * @blka: Block allocator associated with LUN.
1071 * @ctxid: Context ID of context owning the RHTE.
1072 * @rhndl: Resource handle associated with the RHTE.
1073 * @rhte: Destination resource handle entry (RHTE).
1074 * @rhte_src: Source resource handle entry (RHTE).
1076 * Return: 0 on success, -errno on failure
1078 static int clone_lxt(struct afu
*afu
,
1082 struct sisl_rht_entry
*rhte
,
1083 struct sisl_rht_entry
*rhte_src
)
1085 struct cxlflash_cfg
*cfg
= afu
->parent
;
1086 struct device
*dev
= &cfg
->dev
->dev
;
1087 struct sisl_lxt_entry
*lxt
;
1089 u64 aun
; /* chunk# allocated by block allocator */
1092 ngrps
= LXT_NUM_GROUPS(rhte_src
->lxt_cnt
);
1095 /* allocate new LXTs for clone */
1096 lxt
= kzalloc((sizeof(*lxt
) * LXT_GROUP_SIZE
* ngrps
),
1102 memcpy(lxt
, rhte_src
->lxt_start
,
1103 (sizeof(*lxt
) * rhte_src
->lxt_cnt
));
1105 /* clone the LBAs in block allocator via ref_cnt */
1106 mutex_lock(&blka
->mutex
);
1107 for (i
= 0; i
< rhte_src
->lxt_cnt
; i
++) {
1108 aun
= (lxt
[i
].rlba_base
>> MC_CHUNK_SHIFT
);
1109 if (ba_clone(&blka
->ba_lun
, aun
) == -1ULL) {
1110 /* free the clones already made */
1111 for (j
= 0; j
< i
; j
++) {
1112 aun
= (lxt
[j
].rlba_base
>>
1114 ba_free(&blka
->ba_lun
, aun
);
1117 mutex_unlock(&blka
->mutex
);
1122 mutex_unlock(&blka
->mutex
);
1128 * The following sequence is prescribed in the SISlite spec
1129 * for syncing up with the AFU when adding LXT entries.
1131 dma_wmb(); /* Make LXT updates are visible */
1133 rhte
->lxt_start
= lxt
;
1134 dma_wmb(); /* Make RHT entry's LXT table update visible */
1136 rhte
->lxt_cnt
= rhte_src
->lxt_cnt
;
1137 dma_wmb(); /* Make RHT entry's LXT table size update visible */
1139 cxlflash_afu_sync(afu
, ctxid
, rhndl
, AFU_LW_SYNC
);
1141 dev_dbg(dev
, "%s: returning\n", __func__
);
1146 * cxlflash_disk_clone() - clone a context by making snapshot of another
1147 * @sdev: SCSI device associated with LUN owning virtual LUN.
1148 * @clone: Clone ioctl data structure.
1150 * This routine effectively performs cxlflash_disk_open operation for each
1151 * in-use virtual resource in the source context. Note that the destination
1152 * context must be in pristine state and cannot have any resource handles
1153 * open at the time of the clone.
1155 * Return: 0 on success, -errno on failure
1157 int cxlflash_disk_clone(struct scsi_device
*sdev
,
1158 struct dk_cxlflash_clone
*clone
)
1160 struct cxlflash_cfg
*cfg
= shost_priv(sdev
->host
);
1161 struct device
*dev
= &cfg
->dev
->dev
;
1162 struct llun_info
*lli
= sdev
->hostdata
;
1163 struct glun_info
*gli
= lli
->parent
;
1164 struct blka
*blka
= &gli
->blka
;
1165 struct afu
*afu
= cfg
->afu
;
1166 struct dk_cxlflash_release release
= { { 0 }, 0 };
1168 struct ctx_info
*ctxi_src
= NULL
,
1170 struct lun_access
*lun_access_src
, *lun_access_dst
;
1172 u64 ctxid_src
= DECODE_CTXID(clone
->context_id_src
),
1173 ctxid_dst
= DECODE_CTXID(clone
->context_id_dst
),
1174 rctxid_src
= clone
->context_id_src
,
1175 rctxid_dst
= clone
->context_id_dst
;
1181 dev_dbg(dev
, "%s: ctxid_src=%llu ctxid_dst=%llu\n",
1182 __func__
, ctxid_src
, ctxid_dst
);
1184 /* Do not clone yourself */
1185 if (unlikely(rctxid_src
== rctxid_dst
)) {
1190 if (unlikely(gli
->mode
!= MODE_VIRTUAL
)) {
1192 dev_dbg(dev
, "%s: Only supported on virtual LUNs mode=%u\n",
1193 __func__
, gli
->mode
);
1197 ctxi_src
= get_context(cfg
, rctxid_src
, lli
, CTX_CTRL_CLONE
);
1198 ctxi_dst
= get_context(cfg
, rctxid_dst
, lli
, 0);
1199 if (unlikely(!ctxi_src
|| !ctxi_dst
)) {
1200 dev_dbg(dev
, "%s: Bad context ctxid_src=%llu ctxid_dst=%llu\n",
1201 __func__
, ctxid_src
, ctxid_dst
);
1206 /* Verify there is no open resource handle in the destination context */
1207 for (i
= 0; i
< MAX_RHT_PER_CONTEXT
; i
++)
1208 if (ctxi_dst
->rht_start
[i
].nmask
!= 0) {
1213 /* Clone LUN access list */
1214 list_for_each_entry(lun_access_src
, &ctxi_src
->luns
, list
) {
1216 list_for_each_entry(lun_access_dst
, &ctxi_dst
->luns
, list
)
1217 if (lun_access_dst
->sdev
== lun_access_src
->sdev
) {
1223 lun_access_dst
= kzalloc(sizeof(*lun_access_dst
),
1225 if (unlikely(!lun_access_dst
)) {
1226 dev_err(dev
, "%s: lun_access allocation fail\n",
1232 *lun_access_dst
= *lun_access_src
;
1233 list_add(&lun_access_dst
->list
, &sidecar
);
1237 if (unlikely(!ctxi_src
->rht_out
)) {
1238 dev_dbg(dev
, "%s: Nothing to clone\n", __func__
);
1242 /* User specified permission on attach */
1243 perms
= ctxi_dst
->rht_perms
;
1246 * Copy over checked-out RHT (and their associated LXT) entries by
1247 * hand, stopping after we've copied all outstanding entries and
1248 * cleaning up if the clone fails.
1250 * Note: This loop is equivalent to performing cxlflash_disk_open and
1251 * cxlflash_vlun_resize. As such, LUN accounting needs to be taken into
1252 * account by attaching after each successful RHT entry clone. In the
1253 * event that a clone failure is experienced, the LUN detach is handled
1254 * via the cleanup performed by _cxlflash_disk_release.
1256 for (i
= 0; i
< MAX_RHT_PER_CONTEXT
; i
++) {
1257 if (ctxi_src
->rht_out
== ctxi_dst
->rht_out
)
1259 if (ctxi_src
->rht_start
[i
].nmask
== 0)
1262 /* Consume a destination RHT entry */
1263 ctxi_dst
->rht_out
++;
1264 ctxi_dst
->rht_start
[i
].nmask
= ctxi_src
->rht_start
[i
].nmask
;
1265 ctxi_dst
->rht_start
[i
].fp
=
1266 SISL_RHT_FP_CLONE(ctxi_src
->rht_start
[i
].fp
, perms
);
1267 ctxi_dst
->rht_lun
[i
] = ctxi_src
->rht_lun
[i
];
1269 rc
= clone_lxt(afu
, blka
, ctxid_dst
, i
,
1270 &ctxi_dst
->rht_start
[i
],
1271 &ctxi_src
->rht_start
[i
]);
1273 marshal_clone_to_rele(clone
, &release
);
1274 for (j
= 0; j
< i
; j
++) {
1275 release
.rsrc_handle
= j
;
1276 _cxlflash_disk_release(sdev
, ctxi_dst
,
1280 /* Put back the one we failed on */
1281 rhte_checkin(ctxi_dst
, &ctxi_dst
->rht_start
[i
]);
1285 cxlflash_lun_attach(gli
, gli
->mode
, false);
1289 list_splice(&sidecar
, &ctxi_dst
->luns
);
1294 put_context(ctxi_src
);
1296 put_context(ctxi_dst
);
1297 dev_dbg(dev
, "%s: returning rc=%d\n", __func__
, rc
);
1301 list_for_each_entry_safe(lun_access_src
, lun_access_dst
, &sidecar
, list
)
1302 kfree(lun_access_src
);