2 * Block driver for s390 storage class memory.
4 * Copyright IBM Corp. 2012
5 * Author(s): Sebastian Ott <sebott@linux.vnet.ibm.com>
8 #define KMSG_COMPONENT "scm_block"
9 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
11 #include <linux/interrupt.h>
12 #include <linux/spinlock.h>
13 #include <linux/mempool.h>
14 #include <linux/module.h>
15 #include <linux/blkdev.h>
16 #include <linux/blk-mq.h>
17 #include <linux/genhd.h>
18 #include <linux/slab.h>
19 #include <linux/list.h>
23 debug_info_t
*scm_debug
;
25 static mempool_t
*aidaw_pool
;
26 static DEFINE_SPINLOCK(list_lock
);
27 static LIST_HEAD(inactive_requests
);
28 static unsigned int nr_requests
= 64;
29 static unsigned int nr_requests_per_io
= 8;
30 static atomic_t nr_devices
= ATOMIC_INIT(0);
31 module_param(nr_requests
, uint
, S_IRUGO
);
32 MODULE_PARM_DESC(nr_requests
, "Number of parallel requests.");
34 module_param(nr_requests_per_io
, uint
, S_IRUGO
);
35 MODULE_PARM_DESC(nr_requests_per_io
, "Number of requests per IO.");
37 MODULE_DESCRIPTION("Block driver for s390 storage class memory.");
38 MODULE_LICENSE("GPL");
39 MODULE_ALIAS("scm:scmdev*");
41 static void __scm_free_rq(struct scm_request
*scmrq
)
43 struct aob_rq_header
*aobrq
= to_aobrq(scmrq
);
45 free_page((unsigned long) scmrq
->aob
);
46 kfree(scmrq
->request
);
50 static void scm_free_rqs(void)
52 struct list_head
*iter
, *safe
;
53 struct scm_request
*scmrq
;
55 spin_lock_irq(&list_lock
);
56 list_for_each_safe(iter
, safe
, &inactive_requests
) {
57 scmrq
= list_entry(iter
, struct scm_request
, list
);
58 list_del(&scmrq
->list
);
61 spin_unlock_irq(&list_lock
);
63 mempool_destroy(aidaw_pool
);
66 static int __scm_alloc_rq(void)
68 struct aob_rq_header
*aobrq
;
69 struct scm_request
*scmrq
;
71 aobrq
= kzalloc(sizeof(*aobrq
) + sizeof(*scmrq
), GFP_KERNEL
);
75 scmrq
= (void *) aobrq
->data
;
76 scmrq
->aob
= (void *) get_zeroed_page(GFP_DMA
);
80 scmrq
->request
= kcalloc(nr_requests_per_io
, sizeof(scmrq
->request
[0]),
85 INIT_LIST_HEAD(&scmrq
->list
);
86 spin_lock_irq(&list_lock
);
87 list_add(&scmrq
->list
, &inactive_requests
);
88 spin_unlock_irq(&list_lock
);
96 static int scm_alloc_rqs(unsigned int nrqs
)
100 aidaw_pool
= mempool_create_page_pool(max(nrqs
/8, 1U), 0);
104 while (nrqs
-- && !ret
)
105 ret
= __scm_alloc_rq();
110 static struct scm_request
*scm_request_fetch(void)
112 struct scm_request
*scmrq
= NULL
;
114 spin_lock_irq(&list_lock
);
115 if (list_empty(&inactive_requests
))
117 scmrq
= list_first_entry(&inactive_requests
, struct scm_request
, list
);
118 list_del(&scmrq
->list
);
120 spin_unlock_irq(&list_lock
);
124 static void scm_request_done(struct scm_request
*scmrq
)
131 for (i
= 0; i
< nr_requests_per_io
&& scmrq
->request
[i
]; i
++) {
132 msb
= &scmrq
->aob
->msb
[i
];
133 aidaw
= msb
->data_addr
;
135 if ((msb
->flags
& MSB_FLAG_IDA
) && aidaw
&&
136 IS_ALIGNED(aidaw
, PAGE_SIZE
))
137 mempool_free(virt_to_page(aidaw
), aidaw_pool
);
140 spin_lock_irqsave(&list_lock
, flags
);
141 list_add(&scmrq
->list
, &inactive_requests
);
142 spin_unlock_irqrestore(&list_lock
, flags
);
145 static bool scm_permit_request(struct scm_blk_dev
*bdev
, struct request
*req
)
147 return rq_data_dir(req
) != WRITE
|| bdev
->state
!= SCM_WR_PROHIBIT
;
150 static inline struct aidaw
*scm_aidaw_alloc(void)
152 struct page
*page
= mempool_alloc(aidaw_pool
, GFP_ATOMIC
);
154 return page
? page_address(page
) : NULL
;
157 static inline unsigned long scm_aidaw_bytes(struct aidaw
*aidaw
)
159 unsigned long _aidaw
= (unsigned long) aidaw
;
160 unsigned long bytes
= ALIGN(_aidaw
, PAGE_SIZE
) - _aidaw
;
162 return (bytes
/ sizeof(*aidaw
)) * PAGE_SIZE
;
165 struct aidaw
*scm_aidaw_fetch(struct scm_request
*scmrq
, unsigned int bytes
)
169 if (scm_aidaw_bytes(scmrq
->next_aidaw
) >= bytes
)
170 return scmrq
->next_aidaw
;
172 aidaw
= scm_aidaw_alloc();
174 memset(aidaw
, 0, PAGE_SIZE
);
178 static int scm_request_prepare(struct scm_request
*scmrq
)
180 struct scm_blk_dev
*bdev
= scmrq
->bdev
;
181 struct scm_device
*scmdev
= bdev
->gendisk
->private_data
;
182 int pos
= scmrq
->aob
->request
.msb_count
;
183 struct msb
*msb
= &scmrq
->aob
->msb
[pos
];
184 struct request
*req
= scmrq
->request
[pos
];
185 struct req_iterator iter
;
189 aidaw
= scm_aidaw_fetch(scmrq
, blk_rq_bytes(req
));
194 scmrq
->aob
->request
.msb_count
++;
195 msb
->scm_addr
= scmdev
->address
+ ((u64
) blk_rq_pos(req
) << 9);
196 msb
->oc
= (rq_data_dir(req
) == READ
) ? MSB_OC_READ
: MSB_OC_WRITE
;
197 msb
->flags
|= MSB_FLAG_IDA
;
198 msb
->data_addr
= (u64
) aidaw
;
200 rq_for_each_segment(bv
, req
, iter
) {
201 WARN_ON(bv
.bv_offset
);
202 msb
->blk_count
+= bv
.bv_len
>> 12;
203 aidaw
->data_addr
= (u64
) page_address(bv
.bv_page
);
207 scmrq
->next_aidaw
= aidaw
;
211 static inline void scm_request_set(struct scm_request
*scmrq
,
214 scmrq
->request
[scmrq
->aob
->request
.msb_count
] = req
;
217 static inline void scm_request_init(struct scm_blk_dev
*bdev
,
218 struct scm_request
*scmrq
)
220 struct aob_rq_header
*aobrq
= to_aobrq(scmrq
);
221 struct aob
*aob
= scmrq
->aob
;
223 memset(scmrq
->request
, 0,
224 nr_requests_per_io
* sizeof(scmrq
->request
[0]));
225 memset(aob
, 0, sizeof(*aob
));
226 aobrq
->scmdev
= bdev
->scmdev
;
227 aob
->request
.cmd_code
= ARQB_CMD_MOVE
;
228 aob
->request
.data
= (u64
) aobrq
;
231 scmrq
->error
= BLK_STS_OK
;
232 /* We don't use all msbs - place aidaws at the end of the aob page. */
233 scmrq
->next_aidaw
= (void *) &aob
->msb
[nr_requests_per_io
];
236 static void scm_request_requeue(struct scm_request
*scmrq
)
238 struct scm_blk_dev
*bdev
= scmrq
->bdev
;
241 for (i
= 0; i
< nr_requests_per_io
&& scmrq
->request
[i
]; i
++)
242 blk_mq_requeue_request(scmrq
->request
[i
], false);
244 atomic_dec(&bdev
->queued_reqs
);
245 scm_request_done(scmrq
);
246 blk_mq_kick_requeue_list(bdev
->rq
);
249 static void scm_request_finish(struct scm_request
*scmrq
)
251 struct scm_blk_dev
*bdev
= scmrq
->bdev
;
255 for (i
= 0; i
< nr_requests_per_io
&& scmrq
->request
[i
]; i
++) {
256 error
= blk_mq_rq_to_pdu(scmrq
->request
[i
]);
257 *error
= scmrq
->error
;
258 blk_mq_complete_request(scmrq
->request
[i
]);
261 atomic_dec(&bdev
->queued_reqs
);
262 scm_request_done(scmrq
);
265 static void scm_request_start(struct scm_request
*scmrq
)
267 struct scm_blk_dev
*bdev
= scmrq
->bdev
;
269 atomic_inc(&bdev
->queued_reqs
);
270 if (eadm_start_aob(scmrq
->aob
)) {
271 SCM_LOG(5, "no subchannel");
272 scm_request_requeue(scmrq
);
277 struct scm_request
*scmrq
;
281 static blk_status_t
scm_blk_request(struct blk_mq_hw_ctx
*hctx
,
282 const struct blk_mq_queue_data
*qd
)
284 struct scm_device
*scmdev
= hctx
->queue
->queuedata
;
285 struct scm_blk_dev
*bdev
= dev_get_drvdata(&scmdev
->dev
);
286 struct scm_queue
*sq
= hctx
->driver_data
;
287 struct request
*req
= qd
->rq
;
288 struct scm_request
*scmrq
;
290 spin_lock(&sq
->lock
);
291 if (!scm_permit_request(bdev
, req
)) {
292 spin_unlock(&sq
->lock
);
293 return BLK_STS_RESOURCE
;
298 scmrq
= scm_request_fetch();
300 SCM_LOG(5, "no request");
301 spin_unlock(&sq
->lock
);
302 return BLK_STS_RESOURCE
;
304 scm_request_init(bdev
, scmrq
);
307 scm_request_set(scmrq
, req
);
309 if (scm_request_prepare(scmrq
)) {
310 SCM_LOG(5, "aidaw alloc failed");
311 scm_request_set(scmrq
, NULL
);
313 if (scmrq
->aob
->request
.msb_count
)
314 scm_request_start(scmrq
);
317 spin_unlock(&sq
->lock
);
318 return BLK_STS_RESOURCE
;
320 blk_mq_start_request(req
);
322 if (qd
->last
|| scmrq
->aob
->request
.msb_count
== nr_requests_per_io
) {
323 scm_request_start(scmrq
);
326 spin_unlock(&sq
->lock
);
330 static int scm_blk_init_hctx(struct blk_mq_hw_ctx
*hctx
, void *data
,
333 struct scm_queue
*qd
= kzalloc(sizeof(*qd
), GFP_KERNEL
);
338 spin_lock_init(&qd
->lock
);
339 hctx
->driver_data
= qd
;
344 static void scm_blk_exit_hctx(struct blk_mq_hw_ctx
*hctx
, unsigned int idx
)
346 struct scm_queue
*qd
= hctx
->driver_data
;
349 kfree(hctx
->driver_data
);
350 hctx
->driver_data
= NULL
;
353 static void __scmrq_log_error(struct scm_request
*scmrq
)
355 struct aob
*aob
= scmrq
->aob
;
357 if (scmrq
->error
== BLK_STS_TIMEOUT
)
358 SCM_LOG(1, "Request timeout");
360 SCM_LOG(1, "Request error");
361 SCM_LOG_HEX(1, &aob
->response
, sizeof(aob
->response
));
364 SCM_LOG(1, "Retry request");
366 pr_err("An I/O operation to SCM failed with rc=%d\n",
370 static void scm_blk_handle_error(struct scm_request
*scmrq
)
372 struct scm_blk_dev
*bdev
= scmrq
->bdev
;
375 if (scmrq
->error
!= BLK_STS_IOERR
)
378 /* For -EIO the response block is valid. */
379 switch (scmrq
->aob
->response
.eqc
) {
380 case EQC_WR_PROHIBIT
:
381 spin_lock_irqsave(&bdev
->lock
, flags
);
382 if (bdev
->state
!= SCM_WR_PROHIBIT
)
383 pr_info("%lx: Write access to the SCM increment is suspended\n",
384 (unsigned long) bdev
->scmdev
->address
);
385 bdev
->state
= SCM_WR_PROHIBIT
;
386 spin_unlock_irqrestore(&bdev
->lock
, flags
);
393 if (!eadm_start_aob(scmrq
->aob
))
397 scm_request_requeue(scmrq
);
400 void scm_blk_irq(struct scm_device
*scmdev
, void *data
, blk_status_t error
)
402 struct scm_request
*scmrq
= data
;
404 scmrq
->error
= error
;
406 __scmrq_log_error(scmrq
);
407 if (scmrq
->retries
-- > 0) {
408 scm_blk_handle_error(scmrq
);
413 scm_request_finish(scmrq
);
416 static void scm_blk_request_done(struct request
*req
)
418 blk_status_t
*error
= blk_mq_rq_to_pdu(req
);
420 blk_mq_end_request(req
, *error
);
423 static const struct block_device_operations scm_blk_devops
= {
424 .owner
= THIS_MODULE
,
427 static const struct blk_mq_ops scm_mq_ops
= {
428 .queue_rq
= scm_blk_request
,
429 .complete
= scm_blk_request_done
,
430 .init_hctx
= scm_blk_init_hctx
,
431 .exit_hctx
= scm_blk_exit_hctx
,
434 int scm_blk_dev_setup(struct scm_blk_dev
*bdev
, struct scm_device
*scmdev
)
436 unsigned int devindex
, nr_max_blk
;
437 struct request_queue
*rq
;
440 devindex
= atomic_inc_return(&nr_devices
) - 1;
441 /* scma..scmz + scmaa..scmzz */
442 if (devindex
> 701) {
447 bdev
->scmdev
= scmdev
;
448 bdev
->state
= SCM_OPER
;
449 spin_lock_init(&bdev
->lock
);
450 atomic_set(&bdev
->queued_reqs
, 0);
452 bdev
->tag_set
.ops
= &scm_mq_ops
;
453 bdev
->tag_set
.cmd_size
= sizeof(blk_status_t
);
454 bdev
->tag_set
.nr_hw_queues
= nr_requests
;
455 bdev
->tag_set
.queue_depth
= nr_requests_per_io
* nr_requests
;
456 bdev
->tag_set
.flags
= BLK_MQ_F_SHOULD_MERGE
;
458 ret
= blk_mq_alloc_tag_set(&bdev
->tag_set
);
462 rq
= blk_mq_init_queue(&bdev
->tag_set
);
468 nr_max_blk
= min(scmdev
->nr_max_block
,
469 (unsigned int) (PAGE_SIZE
/ sizeof(struct aidaw
)));
471 blk_queue_logical_block_size(rq
, 1 << 12);
472 blk_queue_max_hw_sectors(rq
, nr_max_blk
<< 3); /* 8 * 512 = blk_size */
473 blk_queue_max_segments(rq
, nr_max_blk
);
474 queue_flag_set_unlocked(QUEUE_FLAG_NONROT
, rq
);
475 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM
, rq
);
477 bdev
->gendisk
= alloc_disk(SCM_NR_PARTS
);
478 if (!bdev
->gendisk
) {
482 rq
->queuedata
= scmdev
;
483 bdev
->gendisk
->private_data
= scmdev
;
484 bdev
->gendisk
->fops
= &scm_blk_devops
;
485 bdev
->gendisk
->queue
= rq
;
486 bdev
->gendisk
->major
= scm_major
;
487 bdev
->gendisk
->first_minor
= devindex
* SCM_NR_PARTS
;
489 len
= snprintf(bdev
->gendisk
->disk_name
, DISK_NAME_LEN
, "scm");
491 len
+= snprintf(bdev
->gendisk
->disk_name
+ len
,
492 DISK_NAME_LEN
- len
, "%c",
493 'a' + (devindex
/ 26) - 1);
494 devindex
= devindex
% 26;
496 snprintf(bdev
->gendisk
->disk_name
+ len
, DISK_NAME_LEN
- len
, "%c",
499 /* 512 byte sectors */
500 set_capacity(bdev
->gendisk
, scmdev
->size
>> 9);
501 device_add_disk(&scmdev
->dev
, bdev
->gendisk
);
505 blk_cleanup_queue(rq
);
507 blk_mq_free_tag_set(&bdev
->tag_set
);
509 atomic_dec(&nr_devices
);
513 void scm_blk_dev_cleanup(struct scm_blk_dev
*bdev
)
515 del_gendisk(bdev
->gendisk
);
516 blk_cleanup_queue(bdev
->gendisk
->queue
);
517 blk_mq_free_tag_set(&bdev
->tag_set
);
518 put_disk(bdev
->gendisk
);
521 void scm_blk_set_available(struct scm_blk_dev
*bdev
)
525 spin_lock_irqsave(&bdev
->lock
, flags
);
526 if (bdev
->state
== SCM_WR_PROHIBIT
)
527 pr_info("%lx: Write access to the SCM increment is restored\n",
528 (unsigned long) bdev
->scmdev
->address
);
529 bdev
->state
= SCM_OPER
;
530 spin_unlock_irqrestore(&bdev
->lock
, flags
);
533 static bool __init
scm_blk_params_valid(void)
535 if (!nr_requests_per_io
|| nr_requests_per_io
> 64)
541 static int __init
scm_blk_init(void)
545 if (!scm_blk_params_valid())
548 ret
= register_blkdev(0, "scm");
553 ret
= scm_alloc_rqs(nr_requests
);
557 scm_debug
= debug_register("scm_log", 16, 1, 16);
563 debug_register_view(scm_debug
, &debug_hex_ascii_view
);
564 debug_set_level(scm_debug
, 2);
566 ret
= scm_drv_init();
573 debug_unregister(scm_debug
);
576 unregister_blkdev(scm_major
, "scm");
580 module_init(scm_blk_init
);
582 static void __exit
scm_blk_cleanup(void)
585 debug_unregister(scm_debug
);
587 unregister_blkdev(scm_major
, "scm");
589 module_exit(scm_blk_cleanup
);