1 // SPDX-License-Identifier: GPL-2.0
3 * This is a module to test the HMM (Heterogeneous Memory Management)
4 * mirror and zone device private memory migration APIs of the kernel.
5 * Userspace programs can register with the driver to mirror their own address
6 * space and can use the device to read/write any valid virtual address.
8 #include <linux/init.h>
11 #include <linux/module.h>
12 #include <linux/kernel.h>
13 #include <linux/cdev.h>
14 #include <linux/device.h>
15 #include <linux/mutex.h>
16 #include <linux/rwsem.h>
17 #include <linux/sched.h>
18 #include <linux/slab.h>
19 #include <linux/highmem.h>
20 #include <linux/delay.h>
21 #include <linux/pagemap.h>
22 #include <linux/hmm.h>
23 #include <linux/vmalloc.h>
24 #include <linux/swap.h>
25 #include <linux/swapops.h>
26 #include <linux/sched/mm.h>
27 #include <linux/platform_device.h>
29 #include "test_hmm_uapi.h"
31 #define DMIRROR_NDEVICES 2
32 #define DMIRROR_RANGE_FAULT_TIMEOUT 1000
33 #define DEVMEM_CHUNK_SIZE (256 * 1024 * 1024U)
34 #define DEVMEM_CHUNKS_RESERVE 16
36 static const struct dev_pagemap_ops dmirror_devmem_ops
;
37 static const struct mmu_interval_notifier_ops dmirror_min_ops
;
38 static dev_t dmirror_dev
;
39 static struct page
*dmirror_zero_page
;
41 struct dmirror_device
;
43 struct dmirror_bounce
{
50 #define DPT_XA_TAG_WRITE 3UL
53 * Data structure to track address ranges and register for mmu interval
56 struct dmirror_interval
{
57 struct mmu_interval_notifier notifier
;
58 struct dmirror
*dmirror
;
62 * Data attached to the open device file.
63 * Note that it might be shared after a fork().
66 struct dmirror_device
*mdevice
;
68 struct mmu_interval_notifier notifier
;
73 * ZONE_DEVICE pages for migration and simulating device memory.
75 struct dmirror_chunk
{
76 struct dev_pagemap pagemap
;
77 struct dmirror_device
*mdevice
;
83 struct dmirror_device
{
85 struct hmm_devmem
*devmem
;
87 unsigned int devmem_capacity
;
88 unsigned int devmem_count
;
89 struct dmirror_chunk
**devmem_chunks
;
90 struct mutex devmem_lock
; /* protects the above */
94 struct page
*free_pages
;
95 spinlock_t lock
; /* protects the above */
98 static struct dmirror_device dmirror_devices
[DMIRROR_NDEVICES
];
100 static int dmirror_bounce_init(struct dmirror_bounce
*bounce
,
107 bounce
->ptr
= vmalloc(size
);
113 static void dmirror_bounce_fini(struct dmirror_bounce
*bounce
)
118 static int dmirror_fops_open(struct inode
*inode
, struct file
*filp
)
120 struct cdev
*cdev
= inode
->i_cdev
;
121 struct dmirror
*dmirror
;
124 /* Mirror this process address space */
125 dmirror
= kzalloc(sizeof(*dmirror
), GFP_KERNEL
);
129 dmirror
->mdevice
= container_of(cdev
, struct dmirror_device
, cdevice
);
130 mutex_init(&dmirror
->mutex
);
131 xa_init(&dmirror
->pt
);
133 ret
= mmu_interval_notifier_insert(&dmirror
->notifier
, current
->mm
,
134 0, ULONG_MAX
& PAGE_MASK
, &dmirror_min_ops
);
140 filp
->private_data
= dmirror
;
144 static int dmirror_fops_release(struct inode
*inode
, struct file
*filp
)
146 struct dmirror
*dmirror
= filp
->private_data
;
148 mmu_interval_notifier_remove(&dmirror
->notifier
);
149 xa_destroy(&dmirror
->pt
);
154 static struct dmirror_device
*dmirror_page_to_device(struct page
*page
)
157 return container_of(page
->pgmap
, struct dmirror_chunk
,
161 static int dmirror_do_fault(struct dmirror
*dmirror
, struct hmm_range
*range
)
163 unsigned long *pfns
= range
->hmm_pfns
;
166 for (pfn
= (range
->start
>> PAGE_SHIFT
);
167 pfn
< (range
->end
>> PAGE_SHIFT
);
173 * Since we asked for hmm_range_fault() to populate pages,
174 * it shouldn't return an error entry on success.
176 WARN_ON(*pfns
& HMM_PFN_ERROR
);
177 WARN_ON(!(*pfns
& HMM_PFN_VALID
));
179 page
= hmm_pfn_to_page(*pfns
);
183 if (*pfns
& HMM_PFN_WRITE
)
184 entry
= xa_tag_pointer(entry
, DPT_XA_TAG_WRITE
);
185 else if (WARN_ON(range
->default_flags
& HMM_PFN_WRITE
))
187 entry
= xa_store(&dmirror
->pt
, pfn
, entry
, GFP_ATOMIC
);
188 if (xa_is_err(entry
))
189 return xa_err(entry
);
195 static void dmirror_do_update(struct dmirror
*dmirror
, unsigned long start
,
202 * The XArray doesn't hold references to pages since it relies on
203 * the mmu notifier to clear page pointers when they become stale.
204 * Therefore, it is OK to just clear the entry.
206 xa_for_each_range(&dmirror
->pt
, pfn
, entry
, start
>> PAGE_SHIFT
,
208 xa_erase(&dmirror
->pt
, pfn
);
211 static bool dmirror_interval_invalidate(struct mmu_interval_notifier
*mni
,
212 const struct mmu_notifier_range
*range
,
213 unsigned long cur_seq
)
215 struct dmirror
*dmirror
= container_of(mni
, struct dmirror
, notifier
);
217 if (mmu_notifier_range_blockable(range
))
218 mutex_lock(&dmirror
->mutex
);
219 else if (!mutex_trylock(&dmirror
->mutex
))
222 mmu_interval_set_seq(mni
, cur_seq
);
223 dmirror_do_update(dmirror
, range
->start
, range
->end
);
225 mutex_unlock(&dmirror
->mutex
);
229 static const struct mmu_interval_notifier_ops dmirror_min_ops
= {
230 .invalidate
= dmirror_interval_invalidate
,
233 static int dmirror_range_fault(struct dmirror
*dmirror
,
234 struct hmm_range
*range
)
236 struct mm_struct
*mm
= dmirror
->notifier
.mm
;
237 unsigned long timeout
=
238 jiffies
+ msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT
);
242 if (time_after(jiffies
, timeout
)) {
247 range
->notifier_seq
= mmu_interval_read_begin(range
->notifier
);
249 ret
= hmm_range_fault(range
);
250 mmap_read_unlock(mm
);
257 mutex_lock(&dmirror
->mutex
);
258 if (mmu_interval_read_retry(range
->notifier
,
259 range
->notifier_seq
)) {
260 mutex_unlock(&dmirror
->mutex
);
266 ret
= dmirror_do_fault(dmirror
, range
);
268 mutex_unlock(&dmirror
->mutex
);
273 static int dmirror_fault(struct dmirror
*dmirror
, unsigned long start
,
274 unsigned long end
, bool write
)
276 struct mm_struct
*mm
= dmirror
->notifier
.mm
;
278 unsigned long pfns
[64];
279 struct hmm_range range
= {
280 .notifier
= &dmirror
->notifier
,
284 HMM_PFN_REQ_FAULT
| (write
? HMM_PFN_REQ_WRITE
: 0),
285 .dev_private_owner
= dmirror
->mdevice
,
289 /* Since the mm is for the mirrored process, get a reference first. */
290 if (!mmget_not_zero(mm
))
293 for (addr
= start
; addr
< end
; addr
= range
.end
) {
295 range
.end
= min(addr
+ (ARRAY_SIZE(pfns
) << PAGE_SHIFT
), end
);
297 ret
= dmirror_range_fault(dmirror
, &range
);
306 static int dmirror_do_read(struct dmirror
*dmirror
, unsigned long start
,
307 unsigned long end
, struct dmirror_bounce
*bounce
)
312 ptr
= bounce
->ptr
+ ((start
- bounce
->addr
) & PAGE_MASK
);
314 for (pfn
= start
>> PAGE_SHIFT
; pfn
< (end
>> PAGE_SHIFT
); pfn
++) {
319 entry
= xa_load(&dmirror
->pt
, pfn
);
320 page
= xa_untag_pointer(entry
);
325 memcpy(ptr
, tmp
, PAGE_SIZE
);
335 static int dmirror_read(struct dmirror
*dmirror
, struct hmm_dmirror_cmd
*cmd
)
337 struct dmirror_bounce bounce
;
338 unsigned long start
, end
;
339 unsigned long size
= cmd
->npages
<< PAGE_SHIFT
;
347 ret
= dmirror_bounce_init(&bounce
, start
, size
);
352 mutex_lock(&dmirror
->mutex
);
353 ret
= dmirror_do_read(dmirror
, start
, end
, &bounce
);
354 mutex_unlock(&dmirror
->mutex
);
358 start
= cmd
->addr
+ (bounce
.cpages
<< PAGE_SHIFT
);
359 ret
= dmirror_fault(dmirror
, start
, end
, false);
366 if (copy_to_user(u64_to_user_ptr(cmd
->ptr
), bounce
.ptr
,
370 cmd
->cpages
= bounce
.cpages
;
371 dmirror_bounce_fini(&bounce
);
375 static int dmirror_do_write(struct dmirror
*dmirror
, unsigned long start
,
376 unsigned long end
, struct dmirror_bounce
*bounce
)
381 ptr
= bounce
->ptr
+ ((start
- bounce
->addr
) & PAGE_MASK
);
383 for (pfn
= start
>> PAGE_SHIFT
; pfn
< (end
>> PAGE_SHIFT
); pfn
++) {
388 entry
= xa_load(&dmirror
->pt
, pfn
);
389 page
= xa_untag_pointer(entry
);
390 if (!page
|| xa_pointer_tag(entry
) != DPT_XA_TAG_WRITE
)
394 memcpy(tmp
, ptr
, PAGE_SIZE
);
404 static int dmirror_write(struct dmirror
*dmirror
, struct hmm_dmirror_cmd
*cmd
)
406 struct dmirror_bounce bounce
;
407 unsigned long start
, end
;
408 unsigned long size
= cmd
->npages
<< PAGE_SHIFT
;
416 ret
= dmirror_bounce_init(&bounce
, start
, size
);
419 if (copy_from_user(bounce
.ptr
, u64_to_user_ptr(cmd
->ptr
),
426 mutex_lock(&dmirror
->mutex
);
427 ret
= dmirror_do_write(dmirror
, start
, end
, &bounce
);
428 mutex_unlock(&dmirror
->mutex
);
432 start
= cmd
->addr
+ (bounce
.cpages
<< PAGE_SHIFT
);
433 ret
= dmirror_fault(dmirror
, start
, end
, true);
440 cmd
->cpages
= bounce
.cpages
;
441 dmirror_bounce_fini(&bounce
);
445 static bool dmirror_allocate_chunk(struct dmirror_device
*mdevice
,
448 struct dmirror_chunk
*devmem
;
449 struct resource
*res
;
451 unsigned long pfn_first
;
452 unsigned long pfn_last
;
455 mutex_lock(&mdevice
->devmem_lock
);
457 if (mdevice
->devmem_count
== mdevice
->devmem_capacity
) {
458 struct dmirror_chunk
**new_chunks
;
459 unsigned int new_capacity
;
461 new_capacity
= mdevice
->devmem_capacity
+
462 DEVMEM_CHUNKS_RESERVE
;
463 new_chunks
= krealloc(mdevice
->devmem_chunks
,
464 sizeof(new_chunks
[0]) * new_capacity
,
468 mdevice
->devmem_capacity
= new_capacity
;
469 mdevice
->devmem_chunks
= new_chunks
;
472 res
= request_free_mem_region(&iomem_resource
, DEVMEM_CHUNK_SIZE
,
477 devmem
= kzalloc(sizeof(*devmem
), GFP_KERNEL
);
481 devmem
->pagemap
.type
= MEMORY_DEVICE_PRIVATE
;
482 devmem
->pagemap
.res
= *res
;
483 devmem
->pagemap
.ops
= &dmirror_devmem_ops
;
484 devmem
->pagemap
.owner
= mdevice
;
486 ptr
= memremap_pages(&devmem
->pagemap
, numa_node_id());
490 devmem
->mdevice
= mdevice
;
491 pfn_first
= devmem
->pagemap
.res
.start
>> PAGE_SHIFT
;
492 pfn_last
= pfn_first
+
493 (resource_size(&devmem
->pagemap
.res
) >> PAGE_SHIFT
);
494 mdevice
->devmem_chunks
[mdevice
->devmem_count
++] = devmem
;
496 mutex_unlock(&mdevice
->devmem_lock
);
498 pr_info("added new %u MB chunk (total %u chunks, %u MB) PFNs [0x%lx 0x%lx)\n",
499 DEVMEM_CHUNK_SIZE
/ (1024 * 1024),
500 mdevice
->devmem_count
,
501 mdevice
->devmem_count
* (DEVMEM_CHUNK_SIZE
/ (1024 * 1024)),
502 pfn_first
, pfn_last
);
504 spin_lock(&mdevice
->lock
);
505 for (pfn
= pfn_first
; pfn
< pfn_last
; pfn
++) {
506 struct page
*page
= pfn_to_page(pfn
);
508 page
->zone_device_data
= mdevice
->free_pages
;
509 mdevice
->free_pages
= page
;
512 *ppage
= mdevice
->free_pages
;
513 mdevice
->free_pages
= (*ppage
)->zone_device_data
;
516 spin_unlock(&mdevice
->lock
);
523 release_mem_region(devmem
->pagemap
.res
.start
,
524 resource_size(&devmem
->pagemap
.res
));
526 mutex_unlock(&mdevice
->devmem_lock
);
530 static struct page
*dmirror_devmem_alloc_page(struct dmirror_device
*mdevice
)
532 struct page
*dpage
= NULL
;
536 * This is a fake device so we alloc real system memory to store
539 rpage
= alloc_page(GFP_HIGHUSER
);
543 spin_lock(&mdevice
->lock
);
545 if (mdevice
->free_pages
) {
546 dpage
= mdevice
->free_pages
;
547 mdevice
->free_pages
= dpage
->zone_device_data
;
549 spin_unlock(&mdevice
->lock
);
551 spin_unlock(&mdevice
->lock
);
552 if (!dmirror_allocate_chunk(mdevice
, &dpage
))
556 dpage
->zone_device_data
= rpage
;
566 static void dmirror_migrate_alloc_and_copy(struct migrate_vma
*args
,
567 struct dmirror
*dmirror
)
569 struct dmirror_device
*mdevice
= dmirror
->mdevice
;
570 const unsigned long *src
= args
->src
;
571 unsigned long *dst
= args
->dst
;
574 for (addr
= args
->start
; addr
< args
->end
; addr
+= PAGE_SIZE
,
580 if (!(*src
& MIGRATE_PFN_MIGRATE
))
584 * Note that spage might be NULL which is OK since it is an
585 * unallocated pte_none() or read-only zero page.
587 spage
= migrate_pfn_to_page(*src
);
590 * Don't migrate device private pages from our own driver or
591 * others. For our own we would do a device private memory copy
592 * not a migration and for others, we would need to fault the
593 * other device's page into system memory first.
595 if (spage
&& is_zone_device_page(spage
))
598 dpage
= dmirror_devmem_alloc_page(mdevice
);
602 rpage
= dpage
->zone_device_data
;
604 copy_highpage(rpage
, spage
);
606 clear_highpage(rpage
);
609 * Normally, a device would use the page->zone_device_data to
610 * point to the mirror but here we use it to hold the page for
611 * the simulated device memory and that page holds the pointer
614 rpage
->zone_device_data
= dmirror
;
616 *dst
= migrate_pfn(page_to_pfn(dpage
)) |
618 if ((*src
& MIGRATE_PFN_WRITE
) ||
619 (!spage
&& args
->vma
->vm_flags
& VM_WRITE
))
620 *dst
|= MIGRATE_PFN_WRITE
;
624 static int dmirror_migrate_finalize_and_map(struct migrate_vma
*args
,
625 struct dmirror
*dmirror
)
627 unsigned long start
= args
->start
;
628 unsigned long end
= args
->end
;
629 const unsigned long *src
= args
->src
;
630 const unsigned long *dst
= args
->dst
;
633 /* Map the migrated pages into the device's page tables. */
634 mutex_lock(&dmirror
->mutex
);
636 for (pfn
= start
>> PAGE_SHIFT
; pfn
< (end
>> PAGE_SHIFT
); pfn
++,
641 if (!(*src
& MIGRATE_PFN_MIGRATE
))
644 dpage
= migrate_pfn_to_page(*dst
);
649 * Store the page that holds the data so the page table
650 * doesn't have to deal with ZONE_DEVICE private pages.
652 entry
= dpage
->zone_device_data
;
653 if (*dst
& MIGRATE_PFN_WRITE
)
654 entry
= xa_tag_pointer(entry
, DPT_XA_TAG_WRITE
);
655 entry
= xa_store(&dmirror
->pt
, pfn
, entry
, GFP_ATOMIC
);
656 if (xa_is_err(entry
)) {
657 mutex_unlock(&dmirror
->mutex
);
658 return xa_err(entry
);
662 mutex_unlock(&dmirror
->mutex
);
666 static int dmirror_migrate(struct dmirror
*dmirror
,
667 struct hmm_dmirror_cmd
*cmd
)
669 unsigned long start
, end
, addr
;
670 unsigned long size
= cmd
->npages
<< PAGE_SHIFT
;
671 struct mm_struct
*mm
= dmirror
->notifier
.mm
;
672 struct vm_area_struct
*vma
;
673 unsigned long src_pfns
[64];
674 unsigned long dst_pfns
[64];
675 struct dmirror_bounce bounce
;
676 struct migrate_vma args
;
685 /* Since the mm is for the mirrored process, get a reference first. */
686 if (!mmget_not_zero(mm
))
690 for (addr
= start
; addr
< end
; addr
= next
) {
691 vma
= find_vma(mm
, addr
);
692 if (!vma
|| addr
< vma
->vm_start
||
693 !(vma
->vm_flags
& VM_READ
)) {
697 next
= min(end
, addr
+ (ARRAY_SIZE(src_pfns
) << PAGE_SHIFT
));
698 if (next
> vma
->vm_end
)
706 args
.src_owner
= NULL
;
707 ret
= migrate_vma_setup(&args
);
711 dmirror_migrate_alloc_and_copy(&args
, dmirror
);
712 migrate_vma_pages(&args
);
713 dmirror_migrate_finalize_and_map(&args
, dmirror
);
714 migrate_vma_finalize(&args
);
716 mmap_read_unlock(mm
);
719 /* Return the migrated data for verification. */
720 ret
= dmirror_bounce_init(&bounce
, start
, size
);
723 mutex_lock(&dmirror
->mutex
);
724 ret
= dmirror_do_read(dmirror
, start
, end
, &bounce
);
725 mutex_unlock(&dmirror
->mutex
);
727 if (copy_to_user(u64_to_user_ptr(cmd
->ptr
), bounce
.ptr
,
731 cmd
->cpages
= bounce
.cpages
;
732 dmirror_bounce_fini(&bounce
);
736 mmap_read_unlock(mm
);
741 static void dmirror_mkentry(struct dmirror
*dmirror
, struct hmm_range
*range
,
742 unsigned char *perm
, unsigned long entry
)
746 if (entry
& HMM_PFN_ERROR
) {
747 *perm
= HMM_DMIRROR_PROT_ERROR
;
750 if (!(entry
& HMM_PFN_VALID
)) {
751 *perm
= HMM_DMIRROR_PROT_NONE
;
755 page
= hmm_pfn_to_page(entry
);
756 if (is_device_private_page(page
)) {
757 /* Is the page migrated to this device or some other? */
758 if (dmirror
->mdevice
== dmirror_page_to_device(page
))
759 *perm
= HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL
;
761 *perm
= HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE
;
762 } else if (is_zero_pfn(page_to_pfn(page
)))
763 *perm
= HMM_DMIRROR_PROT_ZERO
;
765 *perm
= HMM_DMIRROR_PROT_NONE
;
766 if (entry
& HMM_PFN_WRITE
)
767 *perm
|= HMM_DMIRROR_PROT_WRITE
;
769 *perm
|= HMM_DMIRROR_PROT_READ
;
772 static bool dmirror_snapshot_invalidate(struct mmu_interval_notifier
*mni
,
773 const struct mmu_notifier_range
*range
,
774 unsigned long cur_seq
)
776 struct dmirror_interval
*dmi
=
777 container_of(mni
, struct dmirror_interval
, notifier
);
778 struct dmirror
*dmirror
= dmi
->dmirror
;
780 if (mmu_notifier_range_blockable(range
))
781 mutex_lock(&dmirror
->mutex
);
782 else if (!mutex_trylock(&dmirror
->mutex
))
786 * Snapshots only need to set the sequence number since any
787 * invalidation in the interval invalidates the whole snapshot.
789 mmu_interval_set_seq(mni
, cur_seq
);
791 mutex_unlock(&dmirror
->mutex
);
795 static const struct mmu_interval_notifier_ops dmirror_mrn_ops
= {
796 .invalidate
= dmirror_snapshot_invalidate
,
799 static int dmirror_range_snapshot(struct dmirror
*dmirror
,
800 struct hmm_range
*range
,
803 struct mm_struct
*mm
= dmirror
->notifier
.mm
;
804 struct dmirror_interval notifier
;
805 unsigned long timeout
=
806 jiffies
+ msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT
);
811 notifier
.dmirror
= dmirror
;
812 range
->notifier
= ¬ifier
.notifier
;
814 ret
= mmu_interval_notifier_insert(range
->notifier
, mm
,
815 range
->start
, range
->end
- range
->start
,
821 if (time_after(jiffies
, timeout
)) {
826 range
->notifier_seq
= mmu_interval_read_begin(range
->notifier
);
829 ret
= hmm_range_fault(range
);
830 mmap_read_unlock(mm
);
837 mutex_lock(&dmirror
->mutex
);
838 if (mmu_interval_read_retry(range
->notifier
,
839 range
->notifier_seq
)) {
840 mutex_unlock(&dmirror
->mutex
);
846 n
= (range
->end
- range
->start
) >> PAGE_SHIFT
;
847 for (i
= 0; i
< n
; i
++)
848 dmirror_mkentry(dmirror
, range
, perm
+ i
, range
->hmm_pfns
[i
]);
850 mutex_unlock(&dmirror
->mutex
);
852 mmu_interval_notifier_remove(range
->notifier
);
856 static int dmirror_snapshot(struct dmirror
*dmirror
,
857 struct hmm_dmirror_cmd
*cmd
)
859 struct mm_struct
*mm
= dmirror
->notifier
.mm
;
860 unsigned long start
, end
;
861 unsigned long size
= cmd
->npages
<< PAGE_SHIFT
;
864 unsigned long pfns
[64];
865 unsigned char perm
[64];
867 struct hmm_range range
= {
869 .dev_private_owner
= dmirror
->mdevice
,
878 /* Since the mm is for the mirrored process, get a reference first. */
879 if (!mmget_not_zero(mm
))
883 * Register a temporary notifier to detect invalidations even if it
884 * overlaps with other mmu_interval_notifiers.
886 uptr
= u64_to_user_ptr(cmd
->ptr
);
887 for (addr
= start
; addr
< end
; addr
= next
) {
890 next
= min(addr
+ (ARRAY_SIZE(pfns
) << PAGE_SHIFT
), end
);
894 ret
= dmirror_range_snapshot(dmirror
, &range
, perm
);
898 n
= (range
.end
- range
.start
) >> PAGE_SHIFT
;
899 if (copy_to_user(uptr
, perm
, n
)) {
912 static long dmirror_fops_unlocked_ioctl(struct file
*filp
,
913 unsigned int command
,
916 void __user
*uarg
= (void __user
*)arg
;
917 struct hmm_dmirror_cmd cmd
;
918 struct dmirror
*dmirror
;
921 dmirror
= filp
->private_data
;
925 if (copy_from_user(&cmd
, uarg
, sizeof(cmd
)))
928 if (cmd
.addr
& ~PAGE_MASK
)
930 if (cmd
.addr
>= (cmd
.addr
+ (cmd
.npages
<< PAGE_SHIFT
)))
937 case HMM_DMIRROR_READ
:
938 ret
= dmirror_read(dmirror
, &cmd
);
941 case HMM_DMIRROR_WRITE
:
942 ret
= dmirror_write(dmirror
, &cmd
);
945 case HMM_DMIRROR_MIGRATE
:
946 ret
= dmirror_migrate(dmirror
, &cmd
);
949 case HMM_DMIRROR_SNAPSHOT
:
950 ret
= dmirror_snapshot(dmirror
, &cmd
);
959 if (copy_to_user(uarg
, &cmd
, sizeof(cmd
)))
965 static const struct file_operations dmirror_fops
= {
966 .open
= dmirror_fops_open
,
967 .release
= dmirror_fops_release
,
968 .unlocked_ioctl
= dmirror_fops_unlocked_ioctl
,
969 .llseek
= default_llseek
,
970 .owner
= THIS_MODULE
,
973 static void dmirror_devmem_free(struct page
*page
)
975 struct page
*rpage
= page
->zone_device_data
;
976 struct dmirror_device
*mdevice
;
981 mdevice
= dmirror_page_to_device(page
);
983 spin_lock(&mdevice
->lock
);
985 page
->zone_device_data
= mdevice
->free_pages
;
986 mdevice
->free_pages
= page
;
987 spin_unlock(&mdevice
->lock
);
990 static vm_fault_t
dmirror_devmem_fault_alloc_and_copy(struct migrate_vma
*args
,
991 struct dmirror_device
*mdevice
)
993 const unsigned long *src
= args
->src
;
994 unsigned long *dst
= args
->dst
;
995 unsigned long start
= args
->start
;
996 unsigned long end
= args
->end
;
999 for (addr
= start
; addr
< end
; addr
+= PAGE_SIZE
,
1001 struct page
*dpage
, *spage
;
1003 spage
= migrate_pfn_to_page(*src
);
1004 if (!spage
|| !(*src
& MIGRATE_PFN_MIGRATE
))
1006 spage
= spage
->zone_device_data
;
1008 dpage
= alloc_page_vma(GFP_HIGHUSER_MOVABLE
, args
->vma
, addr
);
1013 copy_highpage(dpage
, spage
);
1014 *dst
= migrate_pfn(page_to_pfn(dpage
)) | MIGRATE_PFN_LOCKED
;
1015 if (*src
& MIGRATE_PFN_WRITE
)
1016 *dst
|= MIGRATE_PFN_WRITE
;
1021 static void dmirror_devmem_fault_finalize_and_map(struct migrate_vma
*args
,
1022 struct dmirror
*dmirror
)
1024 /* Invalidate the device's page table mapping. */
1025 mutex_lock(&dmirror
->mutex
);
1026 dmirror_do_update(dmirror
, args
->start
, args
->end
);
1027 mutex_unlock(&dmirror
->mutex
);
1030 static vm_fault_t
dmirror_devmem_fault(struct vm_fault
*vmf
)
1032 struct migrate_vma args
;
1033 unsigned long src_pfns
;
1034 unsigned long dst_pfns
;
1036 struct dmirror
*dmirror
;
1040 * Normally, a device would use the page->zone_device_data to point to
1041 * the mirror but here we use it to hold the page for the simulated
1042 * device memory and that page holds the pointer to the mirror.
1044 rpage
= vmf
->page
->zone_device_data
;
1045 dmirror
= rpage
->zone_device_data
;
1047 /* FIXME demonstrate how we can adjust migrate range */
1048 args
.vma
= vmf
->vma
;
1049 args
.start
= vmf
->address
;
1050 args
.end
= args
.start
+ PAGE_SIZE
;
1051 args
.src
= &src_pfns
;
1052 args
.dst
= &dst_pfns
;
1053 args
.src_owner
= dmirror
->mdevice
;
1055 if (migrate_vma_setup(&args
))
1056 return VM_FAULT_SIGBUS
;
1058 ret
= dmirror_devmem_fault_alloc_and_copy(&args
, dmirror
->mdevice
);
1061 migrate_vma_pages(&args
);
1062 dmirror_devmem_fault_finalize_and_map(&args
, dmirror
);
1063 migrate_vma_finalize(&args
);
1067 static const struct dev_pagemap_ops dmirror_devmem_ops
= {
1068 .page_free
= dmirror_devmem_free
,
1069 .migrate_to_ram
= dmirror_devmem_fault
,
1072 static int dmirror_device_init(struct dmirror_device
*mdevice
, int id
)
1077 dev
= MKDEV(MAJOR(dmirror_dev
), id
);
1078 mutex_init(&mdevice
->devmem_lock
);
1079 spin_lock_init(&mdevice
->lock
);
1081 cdev_init(&mdevice
->cdevice
, &dmirror_fops
);
1082 mdevice
->cdevice
.owner
= THIS_MODULE
;
1083 ret
= cdev_add(&mdevice
->cdevice
, dev
, 1);
1087 /* Build a list of free ZONE_DEVICE private struct pages */
1088 dmirror_allocate_chunk(mdevice
, NULL
);
1093 static void dmirror_device_remove(struct dmirror_device
*mdevice
)
1097 if (mdevice
->devmem_chunks
) {
1098 for (i
= 0; i
< mdevice
->devmem_count
; i
++) {
1099 struct dmirror_chunk
*devmem
=
1100 mdevice
->devmem_chunks
[i
];
1102 memunmap_pages(&devmem
->pagemap
);
1103 release_mem_region(devmem
->pagemap
.res
.start
,
1104 resource_size(&devmem
->pagemap
.res
));
1107 kfree(mdevice
->devmem_chunks
);
1110 cdev_del(&mdevice
->cdevice
);
1113 static int __init
hmm_dmirror_init(void)
1118 ret
= alloc_chrdev_region(&dmirror_dev
, 0, DMIRROR_NDEVICES
,
1123 for (id
= 0; id
< DMIRROR_NDEVICES
; id
++) {
1124 ret
= dmirror_device_init(dmirror_devices
+ id
, id
);
1130 * Allocate a zero page to simulate a reserved page of device private
1131 * memory which is always zero. The zero_pfn page isn't used just to
1132 * make the code here simpler (i.e., we need a struct page for it).
1134 dmirror_zero_page
= alloc_page(GFP_HIGHUSER
| __GFP_ZERO
);
1135 if (!dmirror_zero_page
) {
1140 pr_info("HMM test module loaded. This is only for testing HMM.\n");
1145 dmirror_device_remove(dmirror_devices
+ id
);
1146 unregister_chrdev_region(dmirror_dev
, DMIRROR_NDEVICES
);
1151 static void __exit
hmm_dmirror_exit(void)
1155 if (dmirror_zero_page
)
1156 __free_page(dmirror_zero_page
);
1157 for (id
= 0; id
< DMIRROR_NDEVICES
; id
++)
1158 dmirror_device_remove(dmirror_devices
+ id
);
1159 unregister_chrdev_region(dmirror_dev
, DMIRROR_NDEVICES
);
1162 module_init(hmm_dmirror_init
);
1163 module_exit(hmm_dmirror_exit
);
1164 MODULE_LICENSE("GPL");