2 * This file is provided under a dual BSD/GPLv2 license. When using or
3 * redistributing this file, you may do so under either license.
7 * Copyright (c) Intel Corporation.
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of version 2 of the GNU General Public License as
12 * published by the Free Software Foundation.
16 * Copyright (c) Intel Corporation.
17 * All rights reserved.
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions
23 * * Redistributions of source code must retain the above copyright
24 * notice, this list of conditions and the following disclaimer.
25 * * Redistributions in binary form must reproduce the above copy
26 * notice, this list of conditions and the following disclaimer in
27 * the documentation and/or other materials provided with the
29 * * Neither the name of Intel Corporation nor the names of its
30 * contributors may be used to endorse or promote products derived
31 * from this software without specific prior written permission.
33 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
34 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
35 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
36 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
37 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
38 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
39 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
40 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
41 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
42 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
43 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 * PCIe DMA Perf Linux driver
48 #include <linux/init.h>
49 #include <linux/kernel.h>
50 #include <linux/wait.h>
51 #include <linux/module.h>
52 #include <linux/kthread.h>
53 #include <linux/time.h>
54 #include <linux/timer.h>
55 #include <linux/dma-mapping.h>
56 #include <linux/pci.h>
57 #include <linux/slab.h>
58 #include <linux/spinlock.h>
59 #include <linux/debugfs.h>
60 #include <linux/dmaengine.h>
61 #include <linux/delay.h>
62 #include <linux/printk.h>
63 #include <linux/nodemask.h>
65 #define DRIVER_NAME "dma_perf"
66 #define DRIVER_DESCRIPTION "PCIe DMA Performance Measurement Tool"
68 #define DRIVER_LICENSE "Dual BSD/GPL"
69 #define DRIVER_VERSION "1.0"
70 #define DRIVER_AUTHOR "Dave Jiang <dave.jiang@intel.com>"
72 #define MAX_THREADS 32
73 #define MAX_TEST_SIZE 1024 * 1024 /* 1M */
74 #define DMA_CHANNELS_PER_NODE 8
76 MODULE_LICENSE(DRIVER_LICENSE
);
77 MODULE_VERSION(DRIVER_VERSION
);
78 MODULE_AUTHOR(DRIVER_AUTHOR
);
79 MODULE_AUTHOR("Changpeng Liu <changpeng.liu@intel.com>");
80 MODULE_DESCRIPTION(DRIVER_DESCRIPTION
);
82 static struct dentry
*perf_debugfs_dir
;
83 static struct perf_ctx
*g_perf
= NULL
;
85 static unsigned int seg_order
= 12; /* 4K */
86 static unsigned int queue_depth
= 256;
87 static unsigned int run_order
= 32; /* 4G */
97 struct dentry
*debugfs_thr_dir
;
98 struct dentry
*debugfs_copied
;
99 struct dentry
*debugfs_elapsed_time
;
102 wait_queue_head_t wq
;
104 struct task_struct
*thread
;
105 struct perf_ctx
*perf
;
107 struct dma_chan
*dma_chan
;
117 struct dentry
*debugfs_node_dir
;
118 struct dentry
*debugfs_run
;
119 struct dentry
*debugfs_threads
;
120 struct dentry
*debugfs_queue_depth
;
121 struct dentry
*debugfs_transfer_size_order
;
122 struct dentry
*debugfs_total_size_order
;
123 struct dentry
*debugfs_status
;
127 struct pthr_ctx pthr_ctx
[MAX_THREADS
];
131 static void perf_free_mw(struct pthr_ctx
*pctx
);
132 static int perf_set_mw(struct pthr_ctx
*pctx
, size_t size
);
134 static void perf_copy_callback(void *data
)
136 struct pthr_ctx
*pctx
= data
;
138 atomic_dec(&pctx
->dma_sync
);
144 static ssize_t
perf_copy(struct pthr_ctx
*pctx
, char *dst
,
145 char *src
, size_t size
)
147 struct dma_async_tx_descriptor
*txd
;
148 struct dma_chan
*chan
= pctx
->dma_chan
;
149 struct dma_device
*device
;
150 struct dmaengine_unmap_data
*unmap
;
152 size_t src_off
, dst_off
;
156 printk("DMA engine does not exist\n");
160 device
= chan
->device
;
161 src_off
= (size_t)src
& ~PAGE_MASK
;
162 dst_off
= (size_t)dst
& ~PAGE_MASK
;
164 if (!is_dma_copy_aligned(device
, src_off
, dst_off
, size
))
167 unmap
= dmaengine_get_unmap_data(device
->dev
, 2, GFP_NOWAIT
);
172 unmap
->addr
[0] = dma_map_page(device
->dev
, virt_to_page(src
),
173 src_off
, size
, DMA_TO_DEVICE
);
174 if (dma_mapping_error(device
->dev
, unmap
->addr
[0]))
179 unmap
->addr
[1] = dma_map_page(device
->dev
, virt_to_page(dst
),
180 dst_off
, size
, DMA_FROM_DEVICE
);
181 if (dma_mapping_error(device
->dev
, unmap
->addr
[1]))
186 txd
= device
->device_prep_dma_memcpy(chan
, unmap
->addr
[1],
188 size
, DMA_PREP_INTERRUPT
);
190 if (retries
++ > 20) {
191 pctx
->dma_prep_err
++;
194 set_current_state(TASK_INTERRUPTIBLE
);
195 schedule_timeout(50);
200 txd
->callback
= perf_copy_callback
;
201 txd
->callback_param
= pctx
;
202 dma_set_unmap(txd
, unmap
);
204 cookie
= dmaengine_submit(txd
);
205 if (dma_submit_error(cookie
))
208 atomic_inc(&pctx
->dma_sync
);
211 dma_async_issue_pending(chan
);
216 dmaengine_unmap_put(unmap
);
218 dmaengine_unmap_put(unmap
);
222 static int perf_move_data(struct pthr_ctx
*pctx
, char *dst
, char *src
,
223 u64 buf_size
, u64 win_size
, u64 total
)
225 int chunks
, total_chunks
, i
;
226 int copied_chunks
= 0;
230 ktime_t kstart
, kstop
, kdiff
;
232 chunks
= win_size
/ buf_size
;
233 total_chunks
= total
/ buf_size
;
235 printk("%s: chunks: %d total_chunks: %d\n", current
->comm
, chunks
, total_chunks
);
237 kstart
= ktime_get();
239 for (i
= 0; i
< total_chunks
; i
++) {
241 wait_event_interruptible(pctx
->wq
, atomic_read(&pctx
->dma_sync
) < queue_depth
);
243 result
= perf_copy(pctx
, tmp
, src
, buf_size
);
244 pctx
->copied
+= result
;
246 if (copied_chunks
== chunks
) {
253 printk("%s: All DMA descriptors submitted\n", current
->comm
);
255 /* FIXME: need a timeout here eventually */
256 while (atomic_read(&pctx
->dma_sync
) != 0)
259 pr_info("%s: dma_up: %d dma_down: %d dma_prep_err: %d\n",
260 current
->comm
, pctx
->dma_up
, pctx
->dma_down
,
264 kdiff
= ktime_sub(kstop
, kstart
);
265 diff_us
= ktime_to_us(kdiff
);
267 pr_info("%s: copied %Lu bytes\n", current
->comm
, pctx
->copied
);
269 pr_info("%s: lasted %Lu usecs\n", current
->comm
, diff_us
);
271 perf
= pctx
->copied
/ diff_us
;
273 pr_info("%s: MBytes/s: %Lu\n", current
->comm
, perf
);
275 pctx
->elapsed_time
= diff_us
;
280 static bool perf_dma_filter_fn(struct dma_chan
*chan
, void *node
)
282 return dev_to_node(&chan
->dev
->device
) == (int)(unsigned long)node
;
285 static int dma_perf_thread(void *data
)
287 struct pthr_ctx
*pctx
= data
;
288 struct perf_ctx
*perf
= pctx
->perf
;
289 struct perf_mw
*mw
= &pctx
->mw
;
291 u64 win_size
, buf_size
, total
;
294 struct dma_chan
*dma_chan
= NULL
;
296 pr_info("kthread %s starting...\n", current
->comm
);
300 if (!pctx
->dma_chan
) {
301 dma_cap_mask_t dma_mask
;
303 dma_cap_zero(dma_mask
);
304 dma_cap_set(DMA_MEMCPY
, dma_mask
);
305 dma_chan
= dma_request_channel(dma_mask
, perf_dma_filter_fn
,
306 (void *)(unsigned long)node
);
308 pr_warn("%s: cannot acquire DMA channel, quitting\n",
312 pctx
->dma_chan
= dma_chan
;
313 pctx
->dev
= dma_chan
->device
->dev
;
316 src
= kmalloc_node(MAX_TEST_SIZE
, GFP_KERNEL
, node
);
322 rc
= perf_set_mw(pctx
, MAX_TEST_SIZE
);
324 pr_err("%s: set mw failed\n", current
->comm
);
329 win_size
= mw
->buf_size
;
330 buf_size
= 1ULL << seg_order
;
331 total
= 1ULL << run_order
;
333 if (buf_size
> MAX_TEST_SIZE
)
334 buf_size
= MAX_TEST_SIZE
;
336 dst
= (char *)mw
->virt_addr
;
338 atomic_inc(&perf
->tsync
);
339 while (atomic_read(&perf
->tsync
) != perf
->perf_threads
)
342 rc
= perf_move_data(pctx
, dst
, src
, buf_size
, win_size
, total
);
344 atomic_dec(&perf
->tsync
);
347 pr_err("%s: failed\n", current
->comm
);
359 dma_release_channel(dma_chan
);
360 pctx
->dma_chan
= NULL
;
366 static void perf_free_mw(struct pthr_ctx
*pctx
)
368 struct perf_mw
*mw
= &pctx
->mw
;
373 kfree(mw
->virt_addr
);
375 mw
->virt_addr
= NULL
;
378 static int perf_set_mw(struct pthr_ctx
*pctx
, size_t size
)
380 struct perf_mw
*mw
= &pctx
->mw
;
387 mw
->virt_addr
= kmalloc_node(size
, GFP_KERNEL
, pctx
->node
);
389 if (!mw
->virt_addr
) {
397 static ssize_t
debugfs_run_read(struct file
*filp
, char __user
*ubuf
,
398 size_t count
, loff_t
*offp
)
400 struct perf_ctx
*perf
= filp
->private_data
;
402 ssize_t ret
, out_offset
;
407 buf
= kmalloc(64, GFP_KERNEL
);
408 out_offset
= snprintf(buf
, 64, "%d\n", perf
->run
);
409 ret
= simple_read_from_buffer(ubuf
, count
, offp
, buf
, out_offset
);
415 static ssize_t
debugfs_run_write(struct file
*filp
, const char __user
*ubuf
,
416 size_t count
, loff_t
*offp
)
418 struct perf_ctx
*perf
= filp
->private_data
;
421 if (perf
->perf_threads
== 0)
424 if (atomic_read(&perf
->tsync
) == 0)
427 if (perf
->run
== true) {
428 /* lets stop the threads */
430 for (i
= 0; i
< MAX_THREADS
; i
++) {
431 if (perf
->pthr_ctx
[i
].thread
) {
432 kthread_stop(perf
->pthr_ctx
[i
].thread
);
433 perf
->pthr_ctx
[i
].thread
= NULL
;
440 if (perf
->perf_threads
> MAX_THREADS
) {
441 perf
->perf_threads
= MAX_THREADS
;
442 pr_info("Reset total threads to: %u\n", MAX_THREADS
);
445 /* no greater than 1M */
446 if (seg_order
> 20) {
448 pr_info("Fix seg_order to %u\n", seg_order
);
451 if (run_order
< seg_order
) {
452 run_order
= seg_order
;
453 pr_info("Fix run_order to %u\n", run_order
);
456 /* launch kernel thread */
457 for (i
= 0; i
< perf
->perf_threads
; i
++) {
458 struct pthr_ctx
*pctx
;
460 pctx
= &perf
->pthr_ctx
[i
];
461 atomic_set(&pctx
->dma_sync
, 0);
463 pctx
->elapsed_time
= 0;
466 init_waitqueue_head(&pctx
->wq
);
468 /* NUMA socket node */
469 pctx
->node
= i
/ DMA_CHANNELS_PER_NODE
;
473 kthread_create_on_node(dma_perf_thread
,
475 node
, "dma_perf %d", i
);
477 wake_up_process(pctx
->thread
);
480 for (i
= 0; i
< MAX_THREADS
; i
++) {
482 kthread_stop(pctx
->thread
);
489 if (perf
->run
== false)
498 static const struct file_operations dma_perf_debugfs_run
= {
499 .owner
= THIS_MODULE
,
501 .read
= debugfs_run_read
,
502 .write
= debugfs_run_write
,
505 static ssize_t
debugfs_status_read(struct file
*filp
, char __user
*ubuf
,
506 size_t count
, loff_t
*offp
)
508 struct perf_ctx
*perf
= filp
->private_data
;
510 ssize_t ret
, out_offset
;
515 buf
= kmalloc(64, GFP_KERNEL
);
516 out_offset
= snprintf(buf
, 64, "%s\n", atomic_read(&perf
->tsync
) ? "running" : "idle");
517 ret
= simple_read_from_buffer(ubuf
, count
, offp
, buf
, out_offset
);
523 static const struct file_operations dma_perf_debugfs_status
= {
524 .owner
= THIS_MODULE
,
526 .read
= debugfs_status_read
,
529 static int perf_debugfs_setup(struct perf_ctx
*perf
)
535 if (!perf_debugfs_dir
)
538 perf
->debugfs_node_dir
= debugfs_create_dir("dmaperf",
540 if (!perf
->debugfs_node_dir
)
543 perf
->debugfs_run
= debugfs_create_file("run", S_IRUSR
| S_IWUSR
,
544 perf
->debugfs_node_dir
, perf
,
545 &dma_perf_debugfs_run
);
546 if (!perf
->debugfs_run
)
549 perf
->debugfs_status
= debugfs_create_file("status", S_IRUSR
,
550 perf
->debugfs_node_dir
, perf
,
551 &dma_perf_debugfs_status
);
552 if (!perf
->debugfs_status
)
555 perf
->debugfs_threads
= debugfs_create_u8("threads", S_IRUSR
| S_IWUSR
,
556 perf
->debugfs_node_dir
,
557 &perf
->perf_threads
);
558 if (!perf
->debugfs_threads
)
561 perf
->debugfs_queue_depth
= debugfs_create_u32("queue_depth", S_IRUSR
| S_IWUSR
,
562 perf
->debugfs_node_dir
,
564 if (!perf
->debugfs_queue_depth
)
567 perf
->debugfs_transfer_size_order
= debugfs_create_u32("transfer_size_order", S_IRUSR
| S_IWUSR
,
568 perf
->debugfs_node_dir
,
570 if (!perf
->debugfs_transfer_size_order
)
573 perf
->debugfs_total_size_order
= debugfs_create_u32("total_size_order", S_IRUSR
| S_IWUSR
,
574 perf
->debugfs_node_dir
,
576 if (!perf
->debugfs_total_size_order
)
579 for (i
= 0; i
< MAX_THREADS
; i
++) {
580 struct pthr_ctx
*pctx
= &perf
->pthr_ctx
[i
];
581 sprintf(temp_name
, "thread_%d", i
);
583 pctx
->debugfs_thr_dir
= debugfs_create_dir(temp_name
, perf
->debugfs_node_dir
);
584 if (!pctx
->debugfs_thr_dir
)
587 pctx
->debugfs_copied
= debugfs_create_u64("copied", S_IRUSR
,
588 pctx
->debugfs_thr_dir
,
590 if (!pctx
->debugfs_copied
)
593 pctx
->debugfs_elapsed_time
= debugfs_create_u64("elapsed_time", S_IRUSR
,
594 pctx
->debugfs_thr_dir
,
595 &pctx
->elapsed_time
);
596 if (!pctx
->debugfs_elapsed_time
)
603 static int perf_probe(void)
605 struct perf_ctx
*perf
;
608 perf
= kzalloc_node(sizeof(*perf
), GFP_KERNEL
, 0);
614 perf
->numa_nodes
= num_online_nodes();
615 perf
->perf_threads
= 1;
616 atomic_set(&perf
->tsync
, 0);
618 spin_lock_init(&perf
->db_lock
);
620 if (debugfs_initialized() && !perf_debugfs_dir
) {
621 perf_debugfs_dir
= debugfs_create_dir(KBUILD_MODNAME
, NULL
);
622 if (!perf_debugfs_dir
)
625 rc
= perf_debugfs_setup(perf
);
639 static void perf_remove(void)
642 struct perf_ctx
*perf
= g_perf
;
644 if (perf_debugfs_dir
) {
645 debugfs_remove_recursive(perf_debugfs_dir
);
646 perf_debugfs_dir
= NULL
;
649 for (i
= 0; i
< MAX_THREADS
; i
++) {
650 struct pthr_ctx
*pctx
= &perf
->pthr_ctx
[i
];
652 dma_release_channel(pctx
->dma_chan
);
659 static int __init
perf_init_module(void)
661 printk("DMA Performance Test Init\n");
664 module_init(perf_init_module
);
666 static void __exit
perf_exit_module(void)
668 printk("DMA Performance Test Exit\n");
671 module_exit(perf_exit_module
);