1 // SPDX-License-Identifier: GPL-2.0
3 * (C) 2001 Clemson University and The University of Chicago
5 * See COPYING in top-level directory.
8 #include "orangefs-kernel.h"
9 #include "orangefs-bufmap.h"
18 static struct slot_map rw_map
= {
20 .q
= __WAIT_QUEUE_HEAD_INITIALIZER(rw_map
.q
)
22 static struct slot_map readdir_map
= {
24 .q
= __WAIT_QUEUE_HEAD_INITIALIZER(readdir_map
.q
)
28 static void install(struct slot_map
*m
, int count
, unsigned long *map
)
30 spin_lock(&m
->q
.lock
);
31 m
->c
= m
->count
= count
;
33 wake_up_all_locked(&m
->q
);
34 spin_unlock(&m
->q
.lock
);
37 static void mark_killed(struct slot_map
*m
)
39 spin_lock(&m
->q
.lock
);
41 spin_unlock(&m
->q
.lock
);
44 static void run_down(struct slot_map
*m
)
47 spin_lock(&m
->q
.lock
);
50 if (likely(list_empty(&wait
.entry
)))
51 __add_wait_queue_entry_tail(&m
->q
, &wait
);
52 set_current_state(TASK_UNINTERRUPTIBLE
);
57 spin_unlock(&m
->q
.lock
);
59 spin_lock(&m
->q
.lock
);
61 __remove_wait_queue(&m
->q
, &wait
);
62 __set_current_state(TASK_RUNNING
);
65 spin_unlock(&m
->q
.lock
);
68 static void put(struct slot_map
*m
, int slot
)
71 spin_lock(&m
->q
.lock
);
72 __clear_bit(slot
, m
->map
);
75 wake_up_locked(&m
->q
);
76 if (unlikely(v
== -1)) /* finished dying */
77 wake_up_all_locked(&m
->q
);
78 spin_unlock(&m
->q
.lock
);
81 static int wait_for_free(struct slot_map
*m
)
83 long left
= slot_timeout_secs
* HZ
;
88 if (likely(list_empty(&wait
.entry
)))
89 __add_wait_queue_entry_tail_exclusive(&m
->q
, &wait
);
90 set_current_state(TASK_INTERRUPTIBLE
);
96 /* we are waiting for map to be installed */
97 /* it would better be there soon, or we go away */
98 if (n
> ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS
* HZ
)
99 n
= ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS
* HZ
;
101 spin_unlock(&m
->q
.lock
);
102 t
= schedule_timeout(n
);
103 spin_lock(&m
->q
.lock
);
104 if (unlikely(!t
) && n
!= left
&& m
->c
< 0)
107 left
= t
+ (left
- n
);
108 if (signal_pending(current
))
112 if (!list_empty(&wait
.entry
))
113 list_del(&wait
.entry
);
114 else if (left
<= 0 && waitqueue_active(&m
->q
))
115 __wake_up_locked_key(&m
->q
, TASK_INTERRUPTIBLE
, NULL
);
116 __set_current_state(TASK_RUNNING
);
118 if (likely(left
> 0))
121 return left
< 0 ? -EINTR
: -ETIMEDOUT
;
124 static int get(struct slot_map
*m
)
127 spin_lock(&m
->q
.lock
);
128 if (unlikely(m
->c
<= 0))
129 res
= wait_for_free(m
);
132 res
= find_first_zero_bit(m
->map
, m
->count
);
133 __set_bit(res
, m
->map
);
135 spin_unlock(&m
->q
.lock
);
139 /* used to describe mapped buffers */
140 struct orangefs_bufmap_desc
{
141 void __user
*uaddr
; /* user space address pointer */
142 struct page
**page_array
; /* array of mapped pages */
143 int array_count
; /* size of above arrays */
144 struct list_head list_link
;
147 static struct orangefs_bufmap
{
154 struct page
**page_array
;
155 struct orangefs_bufmap_desc
*desc_array
;
157 /* array to track usage of buffer descriptors */
158 unsigned long *buffer_index_array
;
160 /* array to track usage of buffer descriptors for readdir */
161 #define N DIV_ROUND_UP(ORANGEFS_READDIR_DEFAULT_DESC_COUNT, BITS_PER_LONG)
162 unsigned long readdir_index_array
[N
];
164 } *__orangefs_bufmap
;
166 static DEFINE_SPINLOCK(orangefs_bufmap_lock
);
169 orangefs_bufmap_unmap(struct orangefs_bufmap
*bufmap
)
171 unpin_user_pages(bufmap
->page_array
, bufmap
->page_count
);
175 orangefs_bufmap_free(struct orangefs_bufmap
*bufmap
)
177 kfree(bufmap
->page_array
);
178 kfree(bufmap
->desc_array
);
179 kfree(bufmap
->buffer_index_array
);
184 * XXX: Can the size and shift change while the caller gives up the
185 * XXX: lock between calling this and doing something useful?
188 int orangefs_bufmap_size_query(void)
190 struct orangefs_bufmap
*bufmap
;
192 spin_lock(&orangefs_bufmap_lock
);
193 bufmap
= __orangefs_bufmap
;
195 size
= bufmap
->desc_size
;
196 spin_unlock(&orangefs_bufmap_lock
);
200 int orangefs_bufmap_shift_query(void)
202 struct orangefs_bufmap
*bufmap
;
204 spin_lock(&orangefs_bufmap_lock
);
205 bufmap
= __orangefs_bufmap
;
207 shift
= bufmap
->desc_shift
;
208 spin_unlock(&orangefs_bufmap_lock
);
212 static DECLARE_WAIT_QUEUE_HEAD(bufmap_waitq
);
213 static DECLARE_WAIT_QUEUE_HEAD(readdir_waitq
);
215 static struct orangefs_bufmap
*
216 orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc
*user_desc
)
218 struct orangefs_bufmap
*bufmap
;
220 bufmap
= kzalloc(sizeof(*bufmap
), GFP_KERNEL
);
224 bufmap
->total_size
= user_desc
->total_size
;
225 bufmap
->desc_count
= user_desc
->count
;
226 bufmap
->desc_size
= user_desc
->size
;
227 bufmap
->desc_shift
= ilog2(bufmap
->desc_size
);
229 bufmap
->buffer_index_array
=
230 kzalloc(DIV_ROUND_UP(bufmap
->desc_count
, BITS_PER_LONG
), GFP_KERNEL
);
231 if (!bufmap
->buffer_index_array
)
232 goto out_free_bufmap
;
235 kcalloc(bufmap
->desc_count
, sizeof(struct orangefs_bufmap_desc
),
237 if (!bufmap
->desc_array
)
238 goto out_free_index_array
;
240 bufmap
->page_count
= bufmap
->total_size
/ PAGE_SIZE
;
242 /* allocate storage to track our page mappings */
244 kcalloc(bufmap
->page_count
, sizeof(struct page
*), GFP_KERNEL
);
245 if (!bufmap
->page_array
)
246 goto out_free_desc_array
;
251 kfree(bufmap
->desc_array
);
252 out_free_index_array
:
253 kfree(bufmap
->buffer_index_array
);
261 orangefs_bufmap_map(struct orangefs_bufmap
*bufmap
,
262 struct ORANGEFS_dev_map_desc
*user_desc
)
264 int pages_per_desc
= bufmap
->desc_size
/ PAGE_SIZE
;
265 int offset
= 0, ret
, i
;
268 ret
= pin_user_pages_fast((unsigned long)user_desc
->ptr
,
269 bufmap
->page_count
, FOLL_WRITE
, bufmap
->page_array
);
274 if (ret
!= bufmap
->page_count
) {
275 gossip_err("orangefs error: asked for %d pages, only got %d.\n",
276 bufmap
->page_count
, ret
);
278 for (i
= 0; i
< ret
; i
++) {
279 SetPageError(bufmap
->page_array
[i
]);
280 unpin_user_page(bufmap
->page_array
[i
]);
286 * ideally we want to get kernel space pointers for each page, but
287 * we can't kmap that many pages at once if highmem is being used.
288 * so instead, we just kmap/kunmap the page address each time the
291 for (i
= 0; i
< bufmap
->page_count
; i
++)
292 flush_dcache_page(bufmap
->page_array
[i
]);
294 /* build a list of available descriptors */
295 for (offset
= 0, i
= 0; i
< bufmap
->desc_count
; i
++) {
296 bufmap
->desc_array
[i
].page_array
= &bufmap
->page_array
[offset
];
297 bufmap
->desc_array
[i
].array_count
= pages_per_desc
;
298 bufmap
->desc_array
[i
].uaddr
=
299 (user_desc
->ptr
+ (i
* pages_per_desc
* PAGE_SIZE
));
300 offset
+= pages_per_desc
;
307 * orangefs_bufmap_initialize()
309 * initializes the mapped buffer interface
311 * returns 0 on success, -errno on failure
313 int orangefs_bufmap_initialize(struct ORANGEFS_dev_map_desc
*user_desc
)
315 struct orangefs_bufmap
*bufmap
;
318 gossip_debug(GOSSIP_BUFMAP_DEBUG
,
319 "orangefs_bufmap_initialize: called (ptr ("
320 "%p) sz (%d) cnt(%d).\n",
325 if (user_desc
->total_size
< 0 ||
326 user_desc
->size
< 0 ||
327 user_desc
->count
< 0)
331 * sanity check alignment and size of buffer that caller wants to
334 if (PAGE_ALIGN((unsigned long)user_desc
->ptr
) !=
335 (unsigned long)user_desc
->ptr
) {
336 gossip_err("orangefs error: memory alignment (front). %p\n",
341 if (PAGE_ALIGN(((unsigned long)user_desc
->ptr
+ user_desc
->total_size
))
342 != (unsigned long)(user_desc
->ptr
+ user_desc
->total_size
)) {
343 gossip_err("orangefs error: memory alignment (back).(%p + %d)\n",
345 user_desc
->total_size
);
349 if (user_desc
->total_size
!= (user_desc
->size
* user_desc
->count
)) {
350 gossip_err("orangefs error: user provided an oddly sized buffer: (%d, %d, %d)\n",
351 user_desc
->total_size
,
357 if ((user_desc
->size
% PAGE_SIZE
) != 0) {
358 gossip_err("orangefs error: bufmap size not page size divisible (%d).\n",
364 bufmap
= orangefs_bufmap_alloc(user_desc
);
368 ret
= orangefs_bufmap_map(bufmap
, user_desc
);
370 goto out_free_bufmap
;
373 spin_lock(&orangefs_bufmap_lock
);
374 if (__orangefs_bufmap
) {
375 spin_unlock(&orangefs_bufmap_lock
);
376 gossip_err("orangefs: error: bufmap already initialized.\n");
378 goto out_unmap_bufmap
;
380 __orangefs_bufmap
= bufmap
;
383 bufmap
->buffer_index_array
);
384 install(&readdir_map
,
385 ORANGEFS_READDIR_DEFAULT_DESC_COUNT
,
386 bufmap
->readdir_index_array
);
387 spin_unlock(&orangefs_bufmap_lock
);
389 gossip_debug(GOSSIP_BUFMAP_DEBUG
,
390 "orangefs_bufmap_initialize: exiting normally\n");
394 orangefs_bufmap_unmap(bufmap
);
396 orangefs_bufmap_free(bufmap
);
402 * orangefs_bufmap_finalize()
404 * shuts down the mapped buffer interface and releases any resources
409 void orangefs_bufmap_finalize(void)
411 struct orangefs_bufmap
*bufmap
= __orangefs_bufmap
;
414 gossip_debug(GOSSIP_BUFMAP_DEBUG
, "orangefs_bufmap_finalize: called\n");
415 mark_killed(&rw_map
);
416 mark_killed(&readdir_map
);
417 gossip_debug(GOSSIP_BUFMAP_DEBUG
,
418 "orangefs_bufmap_finalize: exiting normally\n");
421 void orangefs_bufmap_run_down(void)
423 struct orangefs_bufmap
*bufmap
= __orangefs_bufmap
;
427 run_down(&readdir_map
);
428 spin_lock(&orangefs_bufmap_lock
);
429 __orangefs_bufmap
= NULL
;
430 spin_unlock(&orangefs_bufmap_lock
);
431 orangefs_bufmap_unmap(bufmap
);
432 orangefs_bufmap_free(bufmap
);
436 * orangefs_bufmap_get()
438 * gets a free mapped buffer descriptor, will sleep until one becomes
439 * available if necessary
441 * returns slot on success, -errno on failure
443 int orangefs_bufmap_get(void)
449 * orangefs_bufmap_put()
451 * returns a mapped buffer descriptor to the collection
455 void orangefs_bufmap_put(int buffer_index
)
457 put(&rw_map
, buffer_index
);
461 * orangefs_readdir_index_get()
463 * gets a free descriptor, will sleep until one becomes
464 * available if necessary.
465 * Although the readdir buffers are not mapped into kernel space
466 * we could do that at a later point of time. Regardless, these
467 * indices are used by the client-core.
469 * returns slot on success, -errno on failure
471 int orangefs_readdir_index_get(void)
473 return get(&readdir_map
);
476 void orangefs_readdir_index_put(int buffer_index
)
478 put(&readdir_map
, buffer_index
);
482 * we've been handed an iovec, we need to copy it to
483 * the shared memory descriptor at "buffer_index".
485 int orangefs_bufmap_copy_from_iovec(struct iov_iter
*iter
,
489 struct orangefs_bufmap_desc
*to
;
492 gossip_debug(GOSSIP_BUFMAP_DEBUG
,
493 "%s: buffer_index:%d: size:%zu:\n",
494 __func__
, buffer_index
, size
);
496 to
= &__orangefs_bufmap
->desc_array
[buffer_index
];
497 for (i
= 0; size
; i
++) {
498 struct page
*page
= to
->page_array
[i
];
502 if (copy_page_from_iter(page
, 0, n
, iter
) != n
)
510 * we've been handed an iovec, we need to fill it from
511 * the shared memory descriptor at "buffer_index".
513 int orangefs_bufmap_copy_to_iovec(struct iov_iter
*iter
,
517 struct orangefs_bufmap_desc
*from
;
520 from
= &__orangefs_bufmap
->desc_array
[buffer_index
];
521 gossip_debug(GOSSIP_BUFMAP_DEBUG
,
522 "%s: buffer_index:%d: size:%zu:\n",
523 __func__
, buffer_index
, size
);
526 for (i
= 0; size
; i
++) {
527 struct page
*page
= from
->page_array
[i
];
531 n
= copy_page_to_iter(page
, 0, n
, iter
);
539 void orangefs_bufmap_page_fill(void *page_to
,
543 struct orangefs_bufmap_desc
*from
;
546 from
= &__orangefs_bufmap
->desc_array
[buffer_index
];
547 page_from
= kmap_atomic(from
->page_array
[slot_index
]);
548 memcpy(page_to
, page_from
, PAGE_SIZE
);
549 kunmap_atomic(page_from
);