]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - kernel/bpf/bpf_lru_list.c
bpf: LRU List
[mirror_ubuntu-zesty-kernel.git] / kernel / bpf / bpf_lru_list.c
CommitLineData
3a08c2fd
MKL
1/* Copyright (c) 2016 Facebook
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 */
7#include <linux/cpumask.h>
8#include <linux/spinlock.h>
9#include <linux/percpu.h>
10
11#include "bpf_lru_list.h"
12
13#define LOCAL_FREE_TARGET (128)
14#define LOCAL_NR_SCANS LOCAL_FREE_TARGET
15
16/* Helpers to get the local list index */
17#define LOCAL_LIST_IDX(t) ((t) - BPF_LOCAL_LIST_T_OFFSET)
18#define LOCAL_FREE_LIST_IDX LOCAL_LIST_IDX(BPF_LRU_LOCAL_LIST_T_FREE)
19#define LOCAL_PENDING_LIST_IDX LOCAL_LIST_IDX(BPF_LRU_LOCAL_LIST_T_PENDING)
20#define IS_LOCAL_LIST_TYPE(t) ((t) >= BPF_LOCAL_LIST_T_OFFSET)
21
22static int get_next_cpu(int cpu)
23{
24 cpu = cpumask_next(cpu, cpu_possible_mask);
25 if (cpu >= nr_cpu_ids)
26 cpu = cpumask_first(cpu_possible_mask);
27 return cpu;
28}
29
30/* Local list helpers */
31static struct list_head *local_free_list(struct bpf_lru_locallist *loc_l)
32{
33 return &loc_l->lists[LOCAL_FREE_LIST_IDX];
34}
35
36static struct list_head *local_pending_list(struct bpf_lru_locallist *loc_l)
37{
38 return &loc_l->lists[LOCAL_PENDING_LIST_IDX];
39}
40
41/* bpf_lru_node helpers */
42static bool bpf_lru_node_is_ref(const struct bpf_lru_node *node)
43{
44 return node->ref;
45}
46
47static void bpf_lru_list_count_inc(struct bpf_lru_list *l,
48 enum bpf_lru_list_type type)
49{
50 if (type < NR_BPF_LRU_LIST_COUNT)
51 l->counts[type]++;
52}
53
54static void bpf_lru_list_count_dec(struct bpf_lru_list *l,
55 enum bpf_lru_list_type type)
56{
57 if (type < NR_BPF_LRU_LIST_COUNT)
58 l->counts[type]--;
59}
60
61static void __bpf_lru_node_move_to_free(struct bpf_lru_list *l,
62 struct bpf_lru_node *node,
63 struct list_head *free_list,
64 enum bpf_lru_list_type tgt_free_type)
65{
66 if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type)))
67 return;
68
69 /* If the removing node is the next_inactive_rotation candidate,
70 * move the next_inactive_rotation pointer also.
71 */
72 if (&node->list == l->next_inactive_rotation)
73 l->next_inactive_rotation = l->next_inactive_rotation->prev;
74
75 bpf_lru_list_count_dec(l, node->type);
76
77 node->type = tgt_free_type;
78 list_move(&node->list, free_list);
79}
80
81/* Move nodes from local list to the LRU list */
82static void __bpf_lru_node_move_in(struct bpf_lru_list *l,
83 struct bpf_lru_node *node,
84 enum bpf_lru_list_type tgt_type)
85{
86 if (WARN_ON_ONCE(!IS_LOCAL_LIST_TYPE(node->type)) ||
87 WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(tgt_type)))
88 return;
89
90 bpf_lru_list_count_inc(l, tgt_type);
91 node->type = tgt_type;
92 node->ref = 0;
93 list_move(&node->list, &l->lists[tgt_type]);
94}
95
96/* Move nodes between or within active and inactive list (like
97 * active to inactive, inactive to active or tail of active back to
98 * the head of active).
99 */
100static void __bpf_lru_node_move(struct bpf_lru_list *l,
101 struct bpf_lru_node *node,
102 enum bpf_lru_list_type tgt_type)
103{
104 if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type)) ||
105 WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(tgt_type)))
106 return;
107
108 if (node->type != tgt_type) {
109 bpf_lru_list_count_dec(l, node->type);
110 bpf_lru_list_count_inc(l, tgt_type);
111 node->type = tgt_type;
112 }
113 node->ref = 0;
114
115 /* If the moving node is the next_inactive_rotation candidate,
116 * move the next_inactive_rotation pointer also.
117 */
118 if (&node->list == l->next_inactive_rotation)
119 l->next_inactive_rotation = l->next_inactive_rotation->prev;
120
121 list_move(&node->list, &l->lists[tgt_type]);
122}
123
124static bool bpf_lru_list_inactive_low(const struct bpf_lru_list *l)
125{
126 return l->counts[BPF_LRU_LIST_T_INACTIVE] <
127 l->counts[BPF_LRU_LIST_T_ACTIVE];
128}
129
130/* Rotate the active list:
131 * 1. Start from tail
132 * 2. If the node has the ref bit set, it will be rotated
133 * back to the head of active list with the ref bit cleared.
134 * Give this node one more chance to survive in the active list.
135 * 3. If the ref bit is not set, move it to the head of the
136 * inactive list.
137 * 4. It will at most scan nr_scans nodes
138 */
139static void __bpf_lru_list_rotate_active(struct bpf_lru *lru,
140 struct bpf_lru_list *l)
141{
142 struct list_head *active = &l->lists[BPF_LRU_LIST_T_ACTIVE];
143 struct bpf_lru_node *node, *tmp_node, *first_node;
144 unsigned int i = 0;
145
146 first_node = list_first_entry(active, struct bpf_lru_node, list);
147 list_for_each_entry_safe_reverse(node, tmp_node, active, list) {
148 if (bpf_lru_node_is_ref(node))
149 __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE);
150 else
151 __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE);
152
153 if (++i == lru->nr_scans || node == first_node)
154 break;
155 }
156}
157
158/* Rotate the inactive list. It starts from the next_inactive_rotation
159 * 1. If the node has ref bit set, it will be moved to the head
160 * of active list with the ref bit cleared.
161 * 2. If the node does not have ref bit set, it will leave it
162 * at its current location (i.e. do nothing) so that it can
163 * be considered during the next inactive_shrink.
164 * 3. It will at most scan nr_scans nodes
165 */
166static void __bpf_lru_list_rotate_inactive(struct bpf_lru *lru,
167 struct bpf_lru_list *l)
168{
169 struct list_head *inactive = &l->lists[BPF_LRU_LIST_T_INACTIVE];
170 struct list_head *cur, *next, *last;
171 struct bpf_lru_node *node;
172 unsigned int i = 0;
173
174 if (list_empty(inactive))
175 return;
176
177 last = l->next_inactive_rotation->next;
178 if (last == inactive)
179 last = last->next;
180
181 cur = l->next_inactive_rotation;
182 while (i < lru->nr_scans) {
183 if (cur == inactive) {
184 cur = cur->prev;
185 continue;
186 }
187
188 node = list_entry(cur, struct bpf_lru_node, list);
189 next = cur->prev;
190 if (bpf_lru_node_is_ref(node))
191 __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE);
192 if (cur == last)
193 break;
194 cur = next;
195 i++;
196 }
197
198 l->next_inactive_rotation = next;
199}
200
201/* Shrink the inactive list. It starts from the tail of the
202 * inactive list and only move the nodes without the ref bit
203 * set to the designated free list.
204 */
205static unsigned int
206__bpf_lru_list_shrink_inactive(struct bpf_lru *lru,
207 struct bpf_lru_list *l,
208 unsigned int tgt_nshrink,
209 struct list_head *free_list,
210 enum bpf_lru_list_type tgt_free_type)
211{
212 struct list_head *inactive = &l->lists[BPF_LRU_LIST_T_INACTIVE];
213 struct bpf_lru_node *node, *tmp_node, *first_node;
214 unsigned int nshrinked = 0;
215 unsigned int i = 0;
216
217 first_node = list_first_entry(inactive, struct bpf_lru_node, list);
218 list_for_each_entry_safe_reverse(node, tmp_node, inactive, list) {
219 if (bpf_lru_node_is_ref(node)) {
220 __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE);
221 } else if (lru->del_from_htab(lru->del_arg, node)) {
222 __bpf_lru_node_move_to_free(l, node, free_list,
223 tgt_free_type);
224 if (++nshrinked == tgt_nshrink)
225 break;
226 }
227
228 if (++i == lru->nr_scans)
229 break;
230 }
231
232 return nshrinked;
233}
234
235/* 1. Rotate the active list (if needed)
236 * 2. Always rotate the inactive list
237 */
238static void __bpf_lru_list_rotate(struct bpf_lru *lru, struct bpf_lru_list *l)
239{
240 if (bpf_lru_list_inactive_low(l))
241 __bpf_lru_list_rotate_active(lru, l);
242
243 __bpf_lru_list_rotate_inactive(lru, l);
244}
245
246/* Calls __bpf_lru_list_shrink_inactive() to shrink some
247 * ref-bit-cleared nodes and move them to the designated
248 * free list.
249 *
250 * If it cannot get a free node after calling
251 * __bpf_lru_list_shrink_inactive(). It will just remove
252 * one node from either inactive or active list without
253 * honoring the ref-bit. It prefers inactive list to active
254 * list in this situation.
255 */
256static unsigned int __bpf_lru_list_shrink(struct bpf_lru *lru,
257 struct bpf_lru_list *l,
258 unsigned int tgt_nshrink,
259 struct list_head *free_list,
260 enum bpf_lru_list_type tgt_free_type)
261
262{
263 struct bpf_lru_node *node, *tmp_node;
264 struct list_head *force_shrink_list;
265 unsigned int nshrinked;
266
267 nshrinked = __bpf_lru_list_shrink_inactive(lru, l, tgt_nshrink,
268 free_list, tgt_free_type);
269 if (nshrinked)
270 return nshrinked;
271
272 /* Do a force shrink by ignoring the reference bit */
273 if (!list_empty(&l->lists[BPF_LRU_LIST_T_INACTIVE]))
274 force_shrink_list = &l->lists[BPF_LRU_LIST_T_INACTIVE];
275 else
276 force_shrink_list = &l->lists[BPF_LRU_LIST_T_ACTIVE];
277
278 list_for_each_entry_safe_reverse(node, tmp_node, force_shrink_list,
279 list) {
280 if (lru->del_from_htab(lru->del_arg, node)) {
281 __bpf_lru_node_move_to_free(l, node, free_list,
282 tgt_free_type);
283 return 1;
284 }
285 }
286
287 return 0;
288}
289
290/* Flush the nodes from the local pending list to the LRU list */
291static void __local_list_flush(struct bpf_lru_list *l,
292 struct bpf_lru_locallist *loc_l)
293{
294 struct bpf_lru_node *node, *tmp_node;
295
296 list_for_each_entry_safe_reverse(node, tmp_node,
297 local_pending_list(loc_l), list) {
298 if (bpf_lru_node_is_ref(node))
299 __bpf_lru_node_move_in(l, node, BPF_LRU_LIST_T_ACTIVE);
300 else
301 __bpf_lru_node_move_in(l, node,
302 BPF_LRU_LIST_T_INACTIVE);
303 }
304}
305
306static void bpf_lru_list_push_free(struct bpf_lru_list *l,
307 struct bpf_lru_node *node)
308{
309 unsigned long flags;
310
311 if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type)))
312 return;
313
314 raw_spin_lock_irqsave(&l->lock, flags);
315 __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE);
316 raw_spin_unlock_irqrestore(&l->lock, flags);
317}
318
319static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru,
320 struct bpf_lru_locallist *loc_l)
321{
322 struct bpf_lru_list *l = &lru->common_lru.lru_list;
323 struct bpf_lru_node *node, *tmp_node;
324 unsigned int nfree = 0;
325
326 raw_spin_lock(&l->lock);
327
328 __local_list_flush(l, loc_l);
329
330 __bpf_lru_list_rotate(lru, l);
331
332 list_for_each_entry_safe(node, tmp_node, &l->lists[BPF_LRU_LIST_T_FREE],
333 list) {
334 __bpf_lru_node_move_to_free(l, node, local_free_list(loc_l),
335 BPF_LRU_LOCAL_LIST_T_FREE);
336 if (++nfree == LOCAL_FREE_TARGET)
337 break;
338 }
339
340 if (nfree < LOCAL_FREE_TARGET)
341 __bpf_lru_list_shrink(lru, l, LOCAL_FREE_TARGET - nfree,
342 local_free_list(loc_l),
343 BPF_LRU_LOCAL_LIST_T_FREE);
344
345 raw_spin_unlock(&l->lock);
346}
347
348static void __local_list_add_pending(struct bpf_lru *lru,
349 struct bpf_lru_locallist *loc_l,
350 int cpu,
351 struct bpf_lru_node *node,
352 u32 hash)
353{
354 *(u32 *)((void *)node + lru->hash_offset) = hash;
355 node->cpu = cpu;
356 node->type = BPF_LRU_LOCAL_LIST_T_PENDING;
357 node->ref = 0;
358 list_add(&node->list, local_pending_list(loc_l));
359}
360
361struct bpf_lru_node *__local_list_pop_free(struct bpf_lru_locallist *loc_l)
362{
363 struct bpf_lru_node *node;
364
365 node = list_first_entry_or_null(local_free_list(loc_l),
366 struct bpf_lru_node,
367 list);
368 if (node)
369 list_del(&node->list);
370
371 return node;
372}
373
374struct bpf_lru_node *__local_list_pop_pending(struct bpf_lru *lru,
375 struct bpf_lru_locallist *loc_l)
376{
377 struct bpf_lru_node *node;
378 bool force = false;
379
380ignore_ref:
381 /* Get from the tail (i.e. older element) of the pending list. */
382 list_for_each_entry_reverse(node, local_pending_list(loc_l),
383 list) {
384 if ((!bpf_lru_node_is_ref(node) || force) &&
385 lru->del_from_htab(lru->del_arg, node)) {
386 list_del(&node->list);
387 return node;
388 }
389 }
390
391 if (!force) {
392 force = true;
393 goto ignore_ref;
394 }
395
396 return NULL;
397}
398
399struct bpf_lru_node *bpf_lru_pop_free(struct bpf_lru *lru, u32 hash)
400{
401 struct bpf_lru_locallist *loc_l, *steal_loc_l;
402 struct bpf_common_lru *clru = &lru->common_lru;
403 struct bpf_lru_node *node;
404 int steal, first_steal;
405 unsigned long flags;
406 int cpu = raw_smp_processor_id();
407
408 loc_l = per_cpu_ptr(clru->local_list, cpu);
409
410 raw_spin_lock_irqsave(&loc_l->lock, flags);
411
412 node = __local_list_pop_free(loc_l);
413 if (!node) {
414 bpf_lru_list_pop_free_to_local(lru, loc_l);
415 node = __local_list_pop_free(loc_l);
416 }
417
418 if (node)
419 __local_list_add_pending(lru, loc_l, cpu, node, hash);
420
421 raw_spin_unlock_irqrestore(&loc_l->lock, flags);
422
423 if (node)
424 return node;
425
426 /* No free nodes found from the local free list and
427 * the global LRU list.
428 *
429 * Steal from the local free/pending list of the
430 * current CPU and remote CPU in RR. It starts
431 * with the loc_l->next_steal CPU.
432 */
433
434 first_steal = loc_l->next_steal;
435 steal = first_steal;
436 do {
437 steal_loc_l = per_cpu_ptr(clru->local_list, steal);
438
439 raw_spin_lock_irqsave(&steal_loc_l->lock, flags);
440
441 node = __local_list_pop_free(steal_loc_l);
442 if (!node)
443 node = __local_list_pop_pending(lru, steal_loc_l);
444
445 raw_spin_unlock_irqrestore(&steal_loc_l->lock, flags);
446
447 steal = get_next_cpu(steal);
448 } while (!node && steal != first_steal);
449
450 loc_l->next_steal = steal;
451
452 if (node) {
453 raw_spin_lock_irqsave(&loc_l->lock, flags);
454 __local_list_add_pending(lru, loc_l, cpu, node, hash);
455 raw_spin_unlock_irqrestore(&loc_l->lock, flags);
456 }
457
458 return node;
459}
460
461void bpf_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node)
462{
463 unsigned long flags;
464
465 if (WARN_ON_ONCE(node->type == BPF_LRU_LIST_T_FREE) ||
466 WARN_ON_ONCE(node->type == BPF_LRU_LOCAL_LIST_T_FREE))
467 return;
468
469 if (node->type == BPF_LRU_LOCAL_LIST_T_PENDING) {
470 struct bpf_lru_locallist *loc_l;
471
472 loc_l = per_cpu_ptr(lru->common_lru.local_list, node->cpu);
473
474 raw_spin_lock_irqsave(&loc_l->lock, flags);
475
476 if (unlikely(node->type != BPF_LRU_LOCAL_LIST_T_PENDING)) {
477 raw_spin_unlock_irqrestore(&loc_l->lock, flags);
478 goto check_lru_list;
479 }
480
481 node->type = BPF_LRU_LOCAL_LIST_T_FREE;
482 node->ref = 0;
483 list_move(&node->list, local_free_list(loc_l));
484
485 raw_spin_unlock_irqrestore(&loc_l->lock, flags);
486 return;
487 }
488
489check_lru_list:
490 bpf_lru_list_push_free(&lru->common_lru.lru_list, node);
491}
492
493void bpf_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset,
494 u32 elem_size, u32 nr_elems)
495{
496 struct bpf_lru_list *l = &lru->common_lru.lru_list;
497 u32 i;
498
499 for (i = 0; i < nr_elems; i++) {
500 struct bpf_lru_node *node;
501
502 node = (struct bpf_lru_node *)(buf + node_offset);
503 node->type = BPF_LRU_LIST_T_FREE;
504 node->ref = 0;
505 list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
506 buf += elem_size;
507 }
508}
509
510static void bpf_lru_locallist_init(struct bpf_lru_locallist *loc_l, int cpu)
511{
512 int i;
513
514 for (i = 0; i < NR_BPF_LRU_LOCAL_LIST_T; i++)
515 INIT_LIST_HEAD(&loc_l->lists[i]);
516
517 loc_l->next_steal = cpu;
518
519 raw_spin_lock_init(&loc_l->lock);
520}
521
522static void bpf_lru_list_init(struct bpf_lru_list *l)
523{
524 int i;
525
526 for (i = 0; i < NR_BPF_LRU_LIST_T; i++)
527 INIT_LIST_HEAD(&l->lists[i]);
528
529 for (i = 0; i < NR_BPF_LRU_LIST_COUNT; i++)
530 l->counts[i] = 0;
531
532 l->next_inactive_rotation = &l->lists[BPF_LRU_LIST_T_INACTIVE];
533
534 raw_spin_lock_init(&l->lock);
535}
536
537int bpf_lru_init(struct bpf_lru *lru, u32 hash_offset,
538 del_from_htab_func del_from_htab, void *del_arg)
539{
540 int cpu;
541 struct bpf_common_lru *clru = &lru->common_lru;
542
543 clru->local_list = alloc_percpu(struct bpf_lru_locallist);
544 if (!clru->local_list)
545 return -ENOMEM;
546
547 for_each_possible_cpu(cpu) {
548 struct bpf_lru_locallist *loc_l;
549
550 loc_l = per_cpu_ptr(clru->local_list, cpu);
551 bpf_lru_locallist_init(loc_l, cpu);
552 }
553
554 bpf_lru_list_init(&clru->lru_list);
555 lru->nr_scans = LOCAL_NR_SCANS;
556
557 lru->del_from_htab = del_from_htab;
558 lru->del_arg = del_arg;
559 lru->hash_offset = hash_offset;
560
561 return 0;
562}
563
564void bpf_lru_destroy(struct bpf_lru *lru)
565{
566 free_percpu(lru->common_lru.local_list);
567}