2 * This file is provided under a dual BSD/GPLv2 license. When using or
3 * redistributing this file, you may do so under either license.
7 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
21 * The full GNU General Public License is included in this distribution
22 * in the file called LICENSE.GPL.
24 * Contact Information:
29 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
30 * All rights reserved.
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
36 * * Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * * Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in
40 * the documentation and/or other materials provided with the
42 * * Neither the name of Intel Corporation nor the names of its
43 * contributors may be used to endorse or promote products derived
44 * from this software without specific prior written permission.
46 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
47 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
48 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
49 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
50 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
51 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
52 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
53 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
54 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
55 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
56 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
60 #include <linux/module.h>
61 #include <linux/miscdevice.h>
63 #include <linux/device.h>
64 #include <linux/errno.h>
65 #include <linux/vmalloc.h>
67 #include <linux/version.h>
71 #include <xen/xen-ops.h>
72 #include <xen/interface/memory.h>
74 #include <exec-env/rte_dom0_common.h>
77 #include "dom0_mm_dev.h"
79 MODULE_LICENSE("Dual BSD/GPL");
80 MODULE_AUTHOR("Intel Corporation");
81 MODULE_DESCRIPTION("Kernel Module for supporting DPDK running on Xen Dom0");
83 static struct dom0_mm_dev dom0_dev
;
84 static struct kobject
*dom0_kobj
= NULL
;
86 static struct memblock_info
*rsv_mm_info
;
88 /* Default configuration for reserved memory size(2048 MB). */
89 static uint32_t rsv_memsize
= 2048;
91 static int dom0_open(struct inode
*inode
, struct file
*file
);
92 static int dom0_release(struct inode
*inode
, struct file
*file
);
93 static int dom0_ioctl(struct file
*file
, unsigned int ioctl_num
,
94 unsigned long ioctl_param
);
95 static int dom0_mmap(struct file
*file
, struct vm_area_struct
*vma
);
96 static int dom0_memory_free(uint32_t size
);
97 static int dom0_memory_release(struct dom0_mm_data
*mm_data
);
99 static const struct file_operations data_fops
= {
100 .owner
= THIS_MODULE
,
102 .release
= dom0_release
,
104 .unlocked_ioctl
= (void *)dom0_ioctl
,
108 show_memsize_rsvd(struct device
*dev
, struct device_attribute
*attr
, char *buf
)
110 return snprintf(buf
, 10, "%u\n", dom0_dev
.used_memsize
);
114 show_memsize(struct device
*dev
, struct device_attribute
*attr
, char *buf
)
116 return snprintf(buf
, 10, "%u\n", dom0_dev
.config_memsize
);
120 store_memsize(struct device
*dev
, struct device_attribute
*attr
,
121 const char *buf
, size_t count
)
124 unsigned long mem_size
;
126 if (0 != kstrtoul(buf
, 0, &mem_size
))
129 mutex_lock(&dom0_dev
.data_lock
);
133 } else if (mem_size
> (rsv_memsize
- dom0_dev
.used_memsize
)) {
134 XEN_ERR("configure memory size fail\n");
138 dom0_dev
.config_memsize
= mem_size
;
141 mutex_unlock(&dom0_dev
.data_lock
);
142 return err
? err
: count
;
145 static DEVICE_ATTR(memsize
, S_IRUGO
| S_IWUSR
, show_memsize
, store_memsize
);
146 static DEVICE_ATTR(memsize_rsvd
, S_IRUGO
, show_memsize_rsvd
, NULL
);
148 static struct attribute
*dev_attrs
[] = {
149 &dev_attr_memsize
.attr
,
150 &dev_attr_memsize_rsvd
.attr
,
154 /* the memory size unit is MB */
155 static const struct attribute_group dev_attr_grp
= {
156 .name
= "memsize-mB",
162 sort_viraddr(struct memblock_info
*mb
, int cnt
)
166 uint64_t tmp_viraddr
;
168 /*sort virtual address and pfn */
169 for(i
= 0; i
< cnt
; i
++) {
170 for(j
= cnt
- 1; j
> i
; j
--) {
171 if(mb
[j
].pfn
< mb
[j
- 1].pfn
) {
172 tmp_pfn
= mb
[j
- 1].pfn
;
173 mb
[j
- 1].pfn
= mb
[j
].pfn
;
176 tmp_viraddr
= mb
[j
- 1].vir_addr
;
177 mb
[j
- 1].vir_addr
= mb
[j
].vir_addr
;
178 mb
[j
].vir_addr
= tmp_viraddr
;
185 dom0_find_memdata(const char * mem_name
)
189 for(i
= 0; i
< NUM_MEM_CTX
; i
++) {
190 if(dom0_dev
.mm_data
[i
] == NULL
)
192 if (!strncmp(dom0_dev
.mm_data
[i
]->name
, mem_name
,
193 sizeof(char) * DOM0_NAME_MAX
)) {
203 dom0_find_mempos(void)
208 for(i
= 0; i
< NUM_MEM_CTX
; i
++) {
209 if(dom0_dev
.mm_data
[i
] == NULL
){
219 dom0_memory_release(struct dom0_mm_data
*mm_data
)
222 uint32_t num_block
, block_id
;
224 /* each memory block is 2M */
225 num_block
= mm_data
->mem_size
/ SIZE_PER_BLOCK
;
229 /* reset global memory data */
230 idx
= dom0_find_memdata(mm_data
->name
);
232 dom0_dev
.used_memsize
-= mm_data
->mem_size
;
233 dom0_dev
.mm_data
[idx
] = NULL
;
234 dom0_dev
.num_mem_ctx
--;
237 /* reset these memory blocks status as free */
238 for (idx
= 0; idx
< num_block
; idx
++) {
239 block_id
= mm_data
->block_num
[idx
];
240 rsv_mm_info
[block_id
].used
= 0;
243 memset(mm_data
, 0, sizeof(struct dom0_mm_data
));
249 dom0_memory_free(uint32_t rsv_size
)
251 uint64_t vstart
, vaddr
;
252 uint32_t i
, num_block
, size
;
254 if (!xen_pv_domain())
257 /* each memory block is 2M */
258 num_block
= rsv_size
/ SIZE_PER_BLOCK
;
262 /* free all memory blocks of size of 4M and destroy contiguous region */
263 for (i
= 0; i
< dom0_dev
.num_bigblock
* 2; i
+= 2) {
264 vstart
= rsv_mm_info
[i
].vir_addr
;
266 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0)
267 if (rsv_mm_info
[i
].exchange_flag
)
268 xen_destroy_contiguous_region(vstart
,
269 DOM0_CONTIG_NUM_ORDER
);
270 if (rsv_mm_info
[i
+ 1].exchange_flag
)
271 xen_destroy_contiguous_region(vstart
+
273 DOM0_CONTIG_NUM_ORDER
);
275 if (rsv_mm_info
[i
].exchange_flag
)
276 xen_destroy_contiguous_region(rsv_mm_info
[i
].pfn
278 DOM0_CONTIG_NUM_ORDER
);
279 if (rsv_mm_info
[i
+ 1].exchange_flag
)
280 xen_destroy_contiguous_region(rsv_mm_info
[i
].pfn
281 * PAGE_SIZE
+ DOM0_MEMBLOCK_SIZE
,
282 DOM0_CONTIG_NUM_ORDER
);
285 size
= DOM0_MEMBLOCK_SIZE
* 2;
288 ClearPageReserved(virt_to_page(vaddr
));
292 free_pages(vstart
, MAX_NUM_ORDER
);
296 /* free all memory blocks size of 2M and destroy contiguous region */
297 for (; i
< num_block
; i
++) {
298 vstart
= rsv_mm_info
[i
].vir_addr
;
300 if (rsv_mm_info
[i
].exchange_flag
)
301 xen_destroy_contiguous_region(vstart
,
302 DOM0_CONTIG_NUM_ORDER
);
304 size
= DOM0_MEMBLOCK_SIZE
;
307 ClearPageReserved(virt_to_page(vaddr
));
311 free_pages(vstart
, DOM0_CONTIG_NUM_ORDER
);
315 memset(rsv_mm_info
, 0, sizeof(struct memblock_info
) * num_block
);
323 find_free_memory(uint32_t count
, struct dom0_mm_data
*mm_data
)
328 while ((i
< count
) && (j
< rsv_memsize
/ SIZE_PER_BLOCK
)) {
329 if (rsv_mm_info
[j
].used
== 0) {
330 mm_data
->block_info
[i
].pfn
= rsv_mm_info
[j
].pfn
;
331 mm_data
->block_info
[i
].vir_addr
=
332 rsv_mm_info
[j
].vir_addr
;
333 mm_data
->block_info
[i
].mfn
= rsv_mm_info
[j
].mfn
;
334 mm_data
->block_info
[i
].exchange_flag
=
335 rsv_mm_info
[j
].exchange_flag
;
336 mm_data
->block_num
[i
] = j
;
337 rsv_mm_info
[j
].used
= 1;
345 * Find all memory segments in which physical addresses are contiguous.
348 find_memseg(int count
, struct dom0_mm_data
* mm_data
)
352 uint64_t zone_len
, pfn
, num_block
;
355 if (mm_data
->block_info
[i
].exchange_flag
== 0) {
360 pfn
= mm_data
->block_info
[i
].pfn
;
361 mm_data
->seg_info
[idx
].pfn
= pfn
;
362 mm_data
->seg_info
[idx
].mfn
[k
] = mm_data
->block_info
[i
].mfn
;
364 for (j
= i
+ 1; j
< count
; j
++) {
366 /* ignore exchange fail memory block */
367 if (mm_data
->block_info
[j
].exchange_flag
== 0)
370 if (mm_data
->block_info
[j
].pfn
!=
371 (mm_data
->block_info
[j
- 1].pfn
+
372 DOM0_MEMBLOCK_SIZE
/ PAGE_SIZE
))
375 mm_data
->seg_info
[idx
].mfn
[k
] = mm_data
->block_info
[j
].mfn
;
379 zone_len
= num_block
* DOM0_MEMBLOCK_SIZE
;
380 mm_data
->seg_info
[idx
].size
= zone_len
;
382 XEN_PRINT("memseg id=%d, size=0x%llx\n", idx
, zone_len
);
385 if (idx
== DOM0_NUM_MEMSEG
)
388 mm_data
->num_memseg
= idx
;
392 dom0_memory_reserve(uint32_t rsv_size
)
394 uint64_t pfn
, vstart
, vaddr
;
395 uint32_t i
, num_block
, size
, allocated_size
= 0;
397 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
398 dma_addr_t dma_handle
;
401 /* 2M as memory block */
402 num_block
= rsv_size
/ SIZE_PER_BLOCK
;
404 rsv_mm_info
= vmalloc(sizeof(struct memblock_info
) * num_block
);
406 XEN_ERR("Unable to allocate device memory information\n");
409 memset(rsv_mm_info
, 0, sizeof(struct memblock_info
) * num_block
);
411 /* try alloc size of 4M once */
412 for (i
= 0; i
< num_block
; i
+= 2) {
413 vstart
= (unsigned long)
414 __get_free_pages(GFP_ATOMIC
, MAX_NUM_ORDER
);
418 dom0_dev
.num_bigblock
= i
/ 2 + 1;
419 allocated_size
= SIZE_PER_BLOCK
* (i
+ 2);
422 size
= DOM0_MEMBLOCK_SIZE
* 2;
426 SetPageReserved(virt_to_page(vaddr
));
431 pfn
= virt_to_pfn(vstart
);
432 rsv_mm_info
[i
].pfn
= pfn
;
433 rsv_mm_info
[i
].vir_addr
= vstart
;
434 rsv_mm_info
[i
+ 1].pfn
=
435 pfn
+ DOM0_MEMBLOCK_SIZE
/ PAGE_SIZE
;
436 rsv_mm_info
[i
+ 1].vir_addr
=
437 vstart
+ DOM0_MEMBLOCK_SIZE
;
440 /*if it failed to alloc 4M, and continue to alloc 2M once */
441 for (; i
< num_block
; i
++) {
442 vstart
= (unsigned long)
443 __get_free_pages(GFP_ATOMIC
, DOM0_CONTIG_NUM_ORDER
);
445 XEN_ERR("allocate memory fail.\n");
446 dom0_memory_free(allocated_size
);
450 allocated_size
+= SIZE_PER_BLOCK
;
452 size
= DOM0_MEMBLOCK_SIZE
;
455 SetPageReserved(virt_to_page(vaddr
));
459 pfn
= virt_to_pfn(vstart
);
460 rsv_mm_info
[i
].pfn
= pfn
;
461 rsv_mm_info
[i
].vir_addr
= vstart
;
464 sort_viraddr(rsv_mm_info
, num_block
);
466 for (i
= 0; i
< num_block
; i
++) {
469 * This API is used to exchage MFN for getting a block of
470 * contiguous physical addresses, its maximum size is 2M.
472 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0)
473 if (xen_create_contiguous_region(rsv_mm_info
[i
].vir_addr
,
474 DOM0_CONTIG_NUM_ORDER
, 0) == 0) {
476 if (xen_create_contiguous_region(rsv_mm_info
[i
].pfn
* PAGE_SIZE
,
477 DOM0_CONTIG_NUM_ORDER
, 0, &dma_handle
) == 0) {
479 rsv_mm_info
[i
].exchange_flag
= 1;
481 pfn_to_mfn(rsv_mm_info
[i
].pfn
);
482 rsv_mm_info
[i
].used
= 0;
484 XEN_ERR("exchange memeory fail\n");
485 rsv_mm_info
[i
].exchange_flag
= 0;
486 dom0_dev
.fail_times
++;
487 if (dom0_dev
.fail_times
> MAX_EXCHANGE_FAIL_TIME
) {
488 dom0_memory_free(rsv_size
);
498 dom0_prepare_memsegs(struct memory_info
*meminfo
, struct dom0_mm_data
*mm_data
)
503 /* check if there is a free name buffer */
504 memcpy(mm_data
->name
, meminfo
->name
, DOM0_NAME_MAX
);
505 mm_data
->name
[DOM0_NAME_MAX
- 1] = '\0';
506 idx
= dom0_find_mempos();
510 num_block
= meminfo
->size
/ SIZE_PER_BLOCK
;
511 /* find free memory and new memory segments*/
512 find_free_memory(num_block
, mm_data
);
513 find_memseg(num_block
, mm_data
);
515 /* update private memory data */
517 mm_data
->mem_size
= meminfo
->size
;
519 /* update global memory data */
520 dom0_dev
.mm_data
[idx
] = mm_data
;
521 dom0_dev
.num_mem_ctx
++;
522 dom0_dev
.used_memsize
+= mm_data
->mem_size
;
528 dom0_check_memory (struct memory_info
*meminfo
)
533 /* round memory size to the next even number. */
534 if (meminfo
->size
% 2)
537 mem_size
= meminfo
->size
;
538 if (dom0_dev
.num_mem_ctx
> NUM_MEM_CTX
) {
539 XEN_ERR("Memory data space is full in Dom0 driver\n");
542 idx
= dom0_find_memdata(meminfo
->name
);
544 XEN_ERR("Memory data name %s has already exsited in Dom0 driver.\n",
548 if ((dom0_dev
.used_memsize
+ mem_size
) > rsv_memsize
) {
549 XEN_ERR("Total size can't be larger than reserved size.\n");
562 if (rsv_memsize
> DOM0_CONFIG_MEMSIZE
) {
563 XEN_ERR("The reserved memory size cannot be greater than %d\n",
564 DOM0_CONFIG_MEMSIZE
);
568 /* Setup the misc device */
569 dom0_dev
.miscdev
.minor
= MISC_DYNAMIC_MINOR
;
570 dom0_dev
.miscdev
.name
= "dom0_mm";
571 dom0_dev
.miscdev
.fops
= &data_fops
;
573 /* register misc char device */
574 if (misc_register(&dom0_dev
.miscdev
) != 0) {
575 XEN_ERR("Misc device registration failed\n");
579 mutex_init(&dom0_dev
.data_lock
);
580 dom0_kobj
= kobject_create_and_add("dom0-mm", mm_kobj
);
583 XEN_ERR("dom0-mm object creation failed\n");
584 misc_deregister(&dom0_dev
.miscdev
);
588 if (sysfs_create_group(dom0_kobj
, &dev_attr_grp
)) {
589 kobject_put(dom0_kobj
);
590 misc_deregister(&dom0_dev
.miscdev
);
594 if (dom0_memory_reserve(rsv_memsize
) < 0) {
595 sysfs_remove_group(dom0_kobj
, &dev_attr_grp
);
596 kobject_put(dom0_kobj
);
597 misc_deregister(&dom0_dev
.miscdev
);
601 XEN_PRINT("####### DPDK Xen Dom0 module loaded #######\n");
609 if (rsv_mm_info
!= NULL
)
610 dom0_memory_free(rsv_memsize
);
612 sysfs_remove_group(dom0_kobj
, &dev_attr_grp
);
613 kobject_put(dom0_kobj
);
614 misc_deregister(&dom0_dev
.miscdev
);
616 XEN_PRINT("####### DPDK Xen Dom0 module unloaded #######\n");
620 dom0_open(struct inode
*inode
, struct file
*file
)
622 file
->private_data
= NULL
;
624 XEN_PRINT(KERN_INFO
"/dev/dom0_mm opened\n");
629 dom0_release(struct inode
*inode
, struct file
*file
)
632 struct dom0_mm_data
*mm_data
= file
->private_data
;
637 mutex_lock(&dom0_dev
.data_lock
);
638 if (--mm_data
->refcnt
== 0)
639 ret
= dom0_memory_release(mm_data
);
640 mutex_unlock(&dom0_dev
.data_lock
);
642 file
->private_data
= NULL
;
643 XEN_PRINT(KERN_INFO
"/dev/dom0_mm closed\n");
648 dom0_mmap(struct file
*file
, struct vm_area_struct
*vm
)
651 uint32_t idx
= vm
->vm_pgoff
;
652 uint64_t pfn
, size
= vm
->vm_end
- vm
->vm_start
;
653 struct dom0_mm_data
*mm_data
= file
->private_data
;
658 mutex_lock(&dom0_dev
.data_lock
);
659 if (idx
>= mm_data
->num_memseg
) {
660 mutex_unlock(&dom0_dev
.data_lock
);
664 if (size
> mm_data
->seg_info
[idx
].size
){
665 mutex_unlock(&dom0_dev
.data_lock
);
669 XEN_PRINT("mmap memseg idx =%d,size = 0x%llx\n", idx
, size
);
671 pfn
= mm_data
->seg_info
[idx
].pfn
;
672 mutex_unlock(&dom0_dev
.data_lock
);
674 status
= remap_pfn_range(vm
, vm
->vm_start
, pfn
, size
, PAGE_SHARED
);
679 dom0_ioctl(struct file
*file
,
680 unsigned int ioctl_num
,
681 unsigned long ioctl_param
)
684 char name
[DOM0_NAME_MAX
] = {0};
685 struct memory_info meminfo
;
686 struct dom0_mm_data
*mm_data
= file
->private_data
;
688 XEN_PRINT("IOCTL num=0x%0x param=0x%0lx \n", ioctl_num
, ioctl_param
);
691 * Switch according to the ioctl called
693 switch _IOC_NR(ioctl_num
) {
694 case _IOC_NR(RTE_DOM0_IOCTL_PREPARE_MEMSEG
):
695 ret
= copy_from_user(&meminfo
, (void *)ioctl_param
,
696 sizeof(struct memory_info
));
700 if (mm_data
!= NULL
) {
701 XEN_ERR("Cannot create memory segment for the same"
702 " file descriptor\n");
706 /* Allocate private data */
707 mm_data
= vmalloc(sizeof(struct dom0_mm_data
));
709 XEN_ERR("Unable to allocate device private data\n");
712 memset(mm_data
, 0, sizeof(struct dom0_mm_data
));
714 mutex_lock(&dom0_dev
.data_lock
);
715 /* check if we can allocate memory*/
716 if (dom0_check_memory(&meminfo
) < 0) {
717 mutex_unlock(&dom0_dev
.data_lock
);
722 /* allocate memory and created memory segments*/
723 if (dom0_prepare_memsegs(&meminfo
, mm_data
) < 0) {
724 XEN_ERR("create memory segment fail.\n");
725 mutex_unlock(&dom0_dev
.data_lock
);
729 file
->private_data
= mm_data
;
730 mutex_unlock(&dom0_dev
.data_lock
);
733 /* support multiple process in term of memory mapping*/
734 case _IOC_NR(RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG
):
735 ret
= copy_from_user(name
, (void *)ioctl_param
,
736 sizeof(char) * DOM0_NAME_MAX
);
740 mutex_lock(&dom0_dev
.data_lock
);
741 idx
= dom0_find_memdata(name
);
743 mutex_unlock(&dom0_dev
.data_lock
);
747 mm_data
= dom0_dev
.mm_data
[idx
];
749 file
->private_data
= mm_data
;
750 mutex_unlock(&dom0_dev
.data_lock
);
753 case _IOC_NR(RTE_DOM0_IOCTL_GET_NUM_MEMSEG
):
754 ret
= copy_to_user((void *)ioctl_param
, &mm_data
->num_memseg
,
760 case _IOC_NR(RTE_DOM0_IOCTL_GET_MEMSEG_INFO
):
761 ret
= copy_to_user((void *)ioctl_param
,
762 &mm_data
->seg_info
[0],
763 sizeof(struct memseg_info
) *
764 mm_data
->num_memseg
);
769 XEN_PRINT("IOCTL default \n");
776 module_init(dom0_init
);
777 module_exit(dom0_exit
);
779 module_param(rsv_memsize
, uint
, S_IRUGO
| S_IWUSR
);
780 MODULE_PARM_DESC(rsv_memsize
, "Xen-dom0 reserved memory size(MB).\n");