4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2015, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/ptlrpc/sec_bulk.c
38 * Author: Eric Mei <ericm@clusterfs.com>
41 #define DEBUG_SUBSYSTEM S_SEC
43 #include "../../include/linux/libcfs/libcfs.h"
44 #include <linux/crypto.h>
46 #include "../include/obd.h"
47 #include "../include/obd_cksum.h"
48 #include "../include/obd_class.h"
49 #include "../include/obd_support.h"
50 #include "../include/lustre_net.h"
51 #include "../include/lustre_import.h"
52 #include "../include/lustre_dlm.h"
53 #include "../include/lustre_sec.h"
55 #include "ptlrpc_internal.h"
57 /****************************************
58 * bulk encryption page pools *
59 ****************************************/
61 #define POINTERS_PER_PAGE (PAGE_SIZE / sizeof(void *))
62 #define PAGES_PER_POOL (POINTERS_PER_PAGE)
64 #define IDLE_IDX_MAX (100)
65 #define IDLE_IDX_WEIGHT (3)
67 #define CACHE_QUIESCENT_PERIOD (20)
69 static struct ptlrpc_enc_page_pool
{
73 unsigned long epp_max_pages
; /* maximum pages can hold, const */
74 unsigned int epp_max_pools
; /* number of pools, const */
77 * wait queue in case of not enough free pages.
79 wait_queue_head_t epp_waitq
; /* waiting threads */
80 unsigned int epp_waitqlen
; /* wait queue length */
81 unsigned long epp_pages_short
; /* # of pages wanted of in-q users */
82 unsigned int epp_growing
:1; /* during adding pages */
85 * indicating how idle the pools are, from 0 to MAX_IDLE_IDX
86 * this is counted based on each time when getting pages from
87 * the pools, not based on time. which means in case that system
88 * is idled for a while but the idle_idx might still be low if no
89 * activities happened in the pools.
91 unsigned long epp_idle_idx
;
93 /* last shrink time due to mem tight */
94 time64_t epp_last_shrink
;
95 time64_t epp_last_access
;
98 * in-pool pages bookkeeping
100 spinlock_t epp_lock
; /* protect following fields */
101 unsigned long epp_total_pages
; /* total pages in pools */
102 unsigned long epp_free_pages
; /* current pages available */
107 unsigned long epp_st_max_pages
; /* # of pages ever reached */
108 unsigned int epp_st_grows
; /* # of grows */
109 unsigned int epp_st_grow_fails
; /* # of add pages failures */
110 unsigned int epp_st_shrinks
; /* # of shrinks */
111 unsigned long epp_st_access
; /* # of access */
112 unsigned long epp_st_missings
; /* # of cache missing */
113 unsigned long epp_st_lowfree
; /* lowest free pages reached */
114 unsigned int epp_st_max_wqlen
; /* highest waitqueue length */
115 unsigned long epp_st_max_wait
; /* in jiffies */
119 struct page
***epp_pools
;
123 * /sys/kernel/debug/lustre/sptlrpc/encrypt_page_pools
125 int sptlrpc_proc_enc_pool_seq_show(struct seq_file
*m
, void *v
)
127 spin_lock(&page_pools
.epp_lock
);
130 "physical pages: %lu\n"
131 "pages per pool: %lu\n"
136 "idle index: %lu/100\n"
137 "last shrink: %lds\n"
138 "last access: %lds\n"
139 "max pages reached: %lu\n"
141 "grows failure: %u\n"
143 "cache access: %lu\n"
144 "cache missing: %lu\n"
145 "low free mark: %lu\n"
146 "max waitqueue depth: %u\n"
147 "max wait time: %ld/%u\n",
150 page_pools
.epp_max_pages
,
151 page_pools
.epp_max_pools
,
152 page_pools
.epp_total_pages
,
153 page_pools
.epp_free_pages
,
154 page_pools
.epp_idle_idx
,
155 (long)(ktime_get_seconds() - page_pools
.epp_last_shrink
),
156 (long)(ktime_get_seconds() - page_pools
.epp_last_access
),
157 page_pools
.epp_st_max_pages
,
158 page_pools
.epp_st_grows
,
159 page_pools
.epp_st_grow_fails
,
160 page_pools
.epp_st_shrinks
,
161 page_pools
.epp_st_access
,
162 page_pools
.epp_st_missings
,
163 page_pools
.epp_st_lowfree
,
164 page_pools
.epp_st_max_wqlen
,
165 page_pools
.epp_st_max_wait
,
168 spin_unlock(&page_pools
.epp_lock
);
173 static void enc_pools_release_free_pages(long npages
)
176 int p_idx_max1
, p_idx_max2
;
179 LASSERT(npages
<= page_pools
.epp_free_pages
);
180 LASSERT(page_pools
.epp_free_pages
<= page_pools
.epp_total_pages
);
182 /* max pool index before the release */
183 p_idx_max2
= (page_pools
.epp_total_pages
- 1) / PAGES_PER_POOL
;
185 page_pools
.epp_free_pages
-= npages
;
186 page_pools
.epp_total_pages
-= npages
;
188 /* max pool index after the release */
189 p_idx_max1
= page_pools
.epp_total_pages
== 0 ? -1 :
190 ((page_pools
.epp_total_pages
- 1) / PAGES_PER_POOL
);
192 p_idx
= page_pools
.epp_free_pages
/ PAGES_PER_POOL
;
193 g_idx
= page_pools
.epp_free_pages
% PAGES_PER_POOL
;
194 LASSERT(page_pools
.epp_pools
[p_idx
]);
197 LASSERT(page_pools
.epp_pools
[p_idx
]);
198 LASSERT(page_pools
.epp_pools
[p_idx
][g_idx
]);
200 __free_page(page_pools
.epp_pools
[p_idx
][g_idx
]);
201 page_pools
.epp_pools
[p_idx
][g_idx
] = NULL
;
203 if (++g_idx
== PAGES_PER_POOL
) {
209 /* free unused pools */
210 while (p_idx_max1
< p_idx_max2
) {
211 LASSERT(page_pools
.epp_pools
[p_idx_max2
]);
212 kfree(page_pools
.epp_pools
[p_idx_max2
]);
213 page_pools
.epp_pools
[p_idx_max2
] = NULL
;
219 * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
221 static unsigned long enc_pools_shrink_count(struct shrinker
*s
,
222 struct shrink_control
*sc
)
225 * if no pool access for a long time, we consider it's fully idle.
226 * a little race here is fine.
228 if (unlikely(ktime_get_seconds() - page_pools
.epp_last_access
>
229 CACHE_QUIESCENT_PERIOD
)) {
230 spin_lock(&page_pools
.epp_lock
);
231 page_pools
.epp_idle_idx
= IDLE_IDX_MAX
;
232 spin_unlock(&page_pools
.epp_lock
);
235 LASSERT(page_pools
.epp_idle_idx
<= IDLE_IDX_MAX
);
236 return max((int)page_pools
.epp_free_pages
- PTLRPC_MAX_BRW_PAGES
, 0) *
237 (IDLE_IDX_MAX
- page_pools
.epp_idle_idx
) / IDLE_IDX_MAX
;
241 * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
243 static unsigned long enc_pools_shrink_scan(struct shrinker
*s
,
244 struct shrink_control
*sc
)
246 spin_lock(&page_pools
.epp_lock
);
247 sc
->nr_to_scan
= min_t(unsigned long, sc
->nr_to_scan
,
248 page_pools
.epp_free_pages
- PTLRPC_MAX_BRW_PAGES
);
249 if (sc
->nr_to_scan
> 0) {
250 enc_pools_release_free_pages(sc
->nr_to_scan
);
251 CDEBUG(D_SEC
, "released %ld pages, %ld left\n",
252 (long)sc
->nr_to_scan
, page_pools
.epp_free_pages
);
254 page_pools
.epp_st_shrinks
++;
255 page_pools
.epp_last_shrink
= ktime_get_seconds();
257 spin_unlock(&page_pools
.epp_lock
);
260 * if no pool access for a long time, we consider it's fully idle.
261 * a little race here is fine.
263 if (unlikely(ktime_get_seconds() - page_pools
.epp_last_access
>
264 CACHE_QUIESCENT_PERIOD
)) {
265 spin_lock(&page_pools
.epp_lock
);
266 page_pools
.epp_idle_idx
= IDLE_IDX_MAX
;
267 spin_unlock(&page_pools
.epp_lock
);
270 LASSERT(page_pools
.epp_idle_idx
<= IDLE_IDX_MAX
);
271 return sc
->nr_to_scan
;
275 int npages_to_npools(unsigned long npages
)
277 return (int) ((npages
+ PAGES_PER_POOL
- 1) / PAGES_PER_POOL
);
281 * return how many pages cleaned up.
283 static unsigned long enc_pools_cleanup(struct page
***pools
, int npools
)
285 unsigned long cleaned
= 0;
288 for (i
= 0; i
< npools
; i
++) {
290 for (j
= 0; j
< PAGES_PER_POOL
; j
++) {
292 __free_page(pools
[i
][j
]);
304 static inline void enc_pools_wakeup(void)
306 assert_spin_locked(&page_pools
.epp_lock
);
308 if (unlikely(page_pools
.epp_waitqlen
)) {
309 LASSERT(waitqueue_active(&page_pools
.epp_waitq
));
310 wake_up_all(&page_pools
.epp_waitq
);
314 void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc
*desc
)
319 if (!desc
->bd_enc_iov
)
322 LASSERT(desc
->bd_iov_count
> 0);
324 spin_lock(&page_pools
.epp_lock
);
326 p_idx
= page_pools
.epp_free_pages
/ PAGES_PER_POOL
;
327 g_idx
= page_pools
.epp_free_pages
% PAGES_PER_POOL
;
329 LASSERT(page_pools
.epp_free_pages
+ desc
->bd_iov_count
<=
330 page_pools
.epp_total_pages
);
331 LASSERT(page_pools
.epp_pools
[p_idx
]);
333 for (i
= 0; i
< desc
->bd_iov_count
; i
++) {
334 LASSERT(desc
->bd_enc_iov
[i
].kiov_page
);
335 LASSERT(g_idx
!= 0 || page_pools
.epp_pools
[p_idx
]);
336 LASSERT(!page_pools
.epp_pools
[p_idx
][g_idx
]);
338 page_pools
.epp_pools
[p_idx
][g_idx
] =
339 desc
->bd_enc_iov
[i
].kiov_page
;
341 if (++g_idx
== PAGES_PER_POOL
) {
347 page_pools
.epp_free_pages
+= desc
->bd_iov_count
;
351 spin_unlock(&page_pools
.epp_lock
);
353 kfree(desc
->bd_enc_iov
);
354 desc
->bd_enc_iov
= NULL
;
356 EXPORT_SYMBOL(sptlrpc_enc_pool_put_pages
);
358 static inline void enc_pools_alloc(void)
360 LASSERT(page_pools
.epp_max_pools
);
361 page_pools
.epp_pools
=
362 libcfs_kvzalloc(page_pools
.epp_max_pools
*
363 sizeof(*page_pools
.epp_pools
),
367 static inline void enc_pools_free(void)
369 LASSERT(page_pools
.epp_max_pools
);
370 LASSERT(page_pools
.epp_pools
);
372 kvfree(page_pools
.epp_pools
);
375 static struct shrinker pools_shrinker
= {
376 .count_objects
= enc_pools_shrink_count
,
377 .scan_objects
= enc_pools_shrink_scan
,
378 .seeks
= DEFAULT_SEEKS
,
381 int sptlrpc_enc_pool_init(void)
384 * maximum capacity is 1/8 of total physical memory.
385 * is the 1/8 a good number?
387 page_pools
.epp_max_pages
= totalram_pages
/ 8;
388 page_pools
.epp_max_pools
= npages_to_npools(page_pools
.epp_max_pages
);
390 init_waitqueue_head(&page_pools
.epp_waitq
);
391 page_pools
.epp_waitqlen
= 0;
392 page_pools
.epp_pages_short
= 0;
394 page_pools
.epp_growing
= 0;
396 page_pools
.epp_idle_idx
= 0;
397 page_pools
.epp_last_shrink
= ktime_get_seconds();
398 page_pools
.epp_last_access
= ktime_get_seconds();
400 spin_lock_init(&page_pools
.epp_lock
);
401 page_pools
.epp_total_pages
= 0;
402 page_pools
.epp_free_pages
= 0;
404 page_pools
.epp_st_max_pages
= 0;
405 page_pools
.epp_st_grows
= 0;
406 page_pools
.epp_st_grow_fails
= 0;
407 page_pools
.epp_st_shrinks
= 0;
408 page_pools
.epp_st_access
= 0;
409 page_pools
.epp_st_missings
= 0;
410 page_pools
.epp_st_lowfree
= 0;
411 page_pools
.epp_st_max_wqlen
= 0;
412 page_pools
.epp_st_max_wait
= 0;
415 if (!page_pools
.epp_pools
)
418 register_shrinker(&pools_shrinker
);
423 void sptlrpc_enc_pool_fini(void)
425 unsigned long cleaned
, npools
;
427 LASSERT(page_pools
.epp_pools
);
428 LASSERT(page_pools
.epp_total_pages
== page_pools
.epp_free_pages
);
430 unregister_shrinker(&pools_shrinker
);
432 npools
= npages_to_npools(page_pools
.epp_total_pages
);
433 cleaned
= enc_pools_cleanup(page_pools
.epp_pools
, npools
);
434 LASSERT(cleaned
== page_pools
.epp_total_pages
);
438 if (page_pools
.epp_st_access
> 0) {
440 "max pages %lu, grows %u, grow fails %u, shrinks %u, access %lu, missing %lu, max qlen %u, max wait %ld/%d\n",
441 page_pools
.epp_st_max_pages
, page_pools
.epp_st_grows
,
442 page_pools
.epp_st_grow_fails
,
443 page_pools
.epp_st_shrinks
, page_pools
.epp_st_access
,
444 page_pools
.epp_st_missings
, page_pools
.epp_st_max_wqlen
,
445 page_pools
.epp_st_max_wait
, HZ
);
449 static int cfs_hash_alg_id
[] = {
450 [BULK_HASH_ALG_NULL
] = CFS_HASH_ALG_NULL
,
451 [BULK_HASH_ALG_ADLER32
] = CFS_HASH_ALG_ADLER32
,
452 [BULK_HASH_ALG_CRC32
] = CFS_HASH_ALG_CRC32
,
453 [BULK_HASH_ALG_MD5
] = CFS_HASH_ALG_MD5
,
454 [BULK_HASH_ALG_SHA1
] = CFS_HASH_ALG_SHA1
,
455 [BULK_HASH_ALG_SHA256
] = CFS_HASH_ALG_SHA256
,
456 [BULK_HASH_ALG_SHA384
] = CFS_HASH_ALG_SHA384
,
457 [BULK_HASH_ALG_SHA512
] = CFS_HASH_ALG_SHA512
,
460 const char *sptlrpc_get_hash_name(__u8 hash_alg
)
462 return cfs_crypto_hash_name(cfs_hash_alg_id
[hash_alg
]);
464 EXPORT_SYMBOL(sptlrpc_get_hash_name
);
466 __u8
sptlrpc_get_hash_alg(const char *algname
)
468 return cfs_crypto_hash_alg(algname
);
470 EXPORT_SYMBOL(sptlrpc_get_hash_alg
);
472 int bulk_sec_desc_unpack(struct lustre_msg
*msg
, int offset
, int swabbed
)
474 struct ptlrpc_bulk_sec_desc
*bsd
;
475 int size
= msg
->lm_buflens
[offset
];
477 bsd
= lustre_msg_buf(msg
, offset
, sizeof(*bsd
));
479 CERROR("Invalid bulk sec desc: size %d\n", size
);
484 __swab32s(&bsd
->bsd_nob
);
486 if (unlikely(bsd
->bsd_version
!= 0)) {
487 CERROR("Unexpected version %u\n", bsd
->bsd_version
);
491 if (unlikely(bsd
->bsd_type
>= SPTLRPC_BULK_MAX
)) {
492 CERROR("Invalid type %u\n", bsd
->bsd_type
);
496 /* FIXME more sanity check here */
498 if (unlikely(bsd
->bsd_svc
!= SPTLRPC_BULK_SVC_NULL
&&
499 bsd
->bsd_svc
!= SPTLRPC_BULK_SVC_INTG
&&
500 bsd
->bsd_svc
!= SPTLRPC_BULK_SVC_PRIV
)) {
501 CERROR("Invalid svc %u\n", bsd
->bsd_svc
);
507 EXPORT_SYMBOL(bulk_sec_desc_unpack
);
509 int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc
*desc
, __u8 alg
,
510 void *buf
, int buflen
)
512 struct cfs_crypto_hash_desc
*hdesc
;
515 unsigned int bufsize
;
518 LASSERT(alg
> BULK_HASH_ALG_NULL
&& alg
< BULK_HASH_ALG_MAX
);
519 LASSERT(buflen
>= 4);
521 hdesc
= cfs_crypto_hash_init(cfs_hash_alg_id
[alg
], NULL
, 0);
523 CERROR("Unable to initialize checksum hash %s\n",
524 cfs_crypto_hash_name(cfs_hash_alg_id
[alg
]));
525 return PTR_ERR(hdesc
);
528 hashsize
= cfs_crypto_hash_digestsize(cfs_hash_alg_id
[alg
]);
530 for (i
= 0; i
< desc
->bd_iov_count
; i
++) {
531 cfs_crypto_hash_update_page(hdesc
, desc
->bd_iov
[i
].kiov_page
,
532 desc
->bd_iov
[i
].kiov_offset
& ~CFS_PAGE_MASK
,
533 desc
->bd_iov
[i
].kiov_len
);
535 if (hashsize
> buflen
) {
536 bufsize
= sizeof(hashbuf
);
537 err
= cfs_crypto_hash_final(hdesc
, (unsigned char *)hashbuf
,
539 memcpy(buf
, hashbuf
, buflen
);
542 err
= cfs_crypto_hash_final(hdesc
, buf
, &bufsize
);
546 cfs_crypto_hash_final(hdesc
, NULL
, NULL
);
549 EXPORT_SYMBOL(sptlrpc_get_bulk_checksum
);