]>
Commit | Line | Data |
---|---|---|
5db58faf MKL |
1 | /* |
2 | * Copyright (c) 2016 Facebook | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or | |
5 | * modify it under the terms of version 2 of the GNU General Public | |
6 | * License as published by the Free Software Foundation. | |
7 | */ | |
8 | #define _GNU_SOURCE | |
9 | #include <linux/types.h> | |
10 | #include <stdio.h> | |
11 | #include <unistd.h> | |
12 | #include <linux/bpf.h> | |
13 | #include <errno.h> | |
14 | #include <string.h> | |
15 | #include <assert.h> | |
16 | #include <sched.h> | |
17 | #include <sys/wait.h> | |
18 | #include <sys/stat.h> | |
e00c7b21 | 19 | #include <sys/resource.h> |
5db58faf MKL |
20 | #include <fcntl.h> |
21 | #include <stdlib.h> | |
22 | #include <time.h> | |
e00c7b21 | 23 | |
5db58faf | 24 | #include "libbpf.h" |
e00c7b21 | 25 | #include "bpf_util.h" |
5db58faf MKL |
26 | |
27 | #define min(a, b) ((a) < (b) ? (a) : (b)) | |
28 | #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) | |
29 | #define container_of(ptr, type, member) ({ \ | |
30 | const typeof( ((type *)0)->member ) *__mptr = (ptr); \ | |
31 | (type *)( (char *)__mptr - offsetof(type,member) );}) | |
32 | ||
33 | static int nr_cpus; | |
34 | static unsigned long long *dist_keys; | |
35 | static unsigned int dist_key_counts; | |
36 | ||
37 | struct list_head { | |
38 | struct list_head *next, *prev; | |
39 | }; | |
40 | ||
41 | static inline void INIT_LIST_HEAD(struct list_head *list) | |
42 | { | |
43 | list->next = list; | |
44 | list->prev = list; | |
45 | } | |
46 | ||
47 | static inline int list_empty(const struct list_head *head) | |
48 | { | |
49 | return head->next == head; | |
50 | } | |
51 | ||
52 | static inline void __list_add(struct list_head *new, | |
53 | struct list_head *prev, | |
54 | struct list_head *next) | |
55 | { | |
56 | next->prev = new; | |
57 | new->next = next; | |
58 | new->prev = prev; | |
59 | prev->next = new; | |
60 | } | |
61 | ||
62 | static inline void list_add(struct list_head *new, struct list_head *head) | |
63 | { | |
64 | __list_add(new, head, head->next); | |
65 | } | |
66 | ||
67 | static inline void __list_del(struct list_head *prev, struct list_head *next) | |
68 | { | |
69 | next->prev = prev; | |
70 | prev->next = next; | |
71 | } | |
72 | ||
73 | static inline void __list_del_entry(struct list_head *entry) | |
74 | { | |
75 | __list_del(entry->prev, entry->next); | |
76 | } | |
77 | ||
78 | static inline void list_move(struct list_head *list, struct list_head *head) | |
79 | { | |
80 | __list_del_entry(list); | |
81 | list_add(list, head); | |
82 | } | |
83 | ||
84 | #define list_entry(ptr, type, member) \ | |
85 | container_of(ptr, type, member) | |
86 | ||
87 | #define list_last_entry(ptr, type, member) \ | |
88 | list_entry((ptr)->prev, type, member) | |
89 | ||
90 | struct pfect_lru_node { | |
91 | struct list_head list; | |
92 | unsigned long long key; | |
93 | }; | |
94 | ||
95 | struct pfect_lru { | |
96 | struct list_head list; | |
97 | struct pfect_lru_node *free_nodes; | |
98 | unsigned int cur_size; | |
99 | unsigned int lru_size; | |
100 | unsigned int nr_unique; | |
101 | unsigned int nr_misses; | |
102 | unsigned int total; | |
103 | int map_fd; | |
104 | }; | |
105 | ||
106 | static void pfect_lru_init(struct pfect_lru *lru, unsigned int lru_size, | |
107 | unsigned int nr_possible_elems) | |
108 | { | |
109 | lru->map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, | |
110 | sizeof(unsigned long long), | |
111 | sizeof(struct pfect_lru_node *), | |
112 | nr_possible_elems, 0); | |
113 | assert(lru->map_fd != -1); | |
114 | ||
115 | lru->free_nodes = malloc(lru_size * sizeof(struct pfect_lru_node)); | |
116 | assert(lru->free_nodes); | |
117 | ||
118 | INIT_LIST_HEAD(&lru->list); | |
119 | lru->cur_size = 0; | |
120 | lru->lru_size = lru_size; | |
121 | lru->nr_unique = lru->nr_misses = lru->total = 0; | |
122 | } | |
123 | ||
124 | static void pfect_lru_destroy(struct pfect_lru *lru) | |
125 | { | |
126 | close(lru->map_fd); | |
127 | free(lru->free_nodes); | |
128 | } | |
129 | ||
130 | static int pfect_lru_lookup_or_insert(struct pfect_lru *lru, | |
131 | unsigned long long key) | |
132 | { | |
133 | struct pfect_lru_node *node = NULL; | |
134 | int seen = 0; | |
135 | ||
136 | lru->total++; | |
137 | if (!bpf_lookup_elem(lru->map_fd, &key, &node)) { | |
138 | if (node) { | |
139 | list_move(&node->list, &lru->list); | |
140 | return 1; | |
141 | } | |
142 | seen = 1; | |
143 | } | |
144 | ||
145 | if (lru->cur_size < lru->lru_size) { | |
146 | node = &lru->free_nodes[lru->cur_size++]; | |
147 | INIT_LIST_HEAD(&node->list); | |
148 | } else { | |
149 | struct pfect_lru_node *null_node = NULL; | |
150 | ||
151 | node = list_last_entry(&lru->list, | |
152 | struct pfect_lru_node, | |
153 | list); | |
154 | bpf_update_elem(lru->map_fd, &node->key, &null_node, BPF_EXIST); | |
155 | } | |
156 | ||
157 | node->key = key; | |
158 | list_move(&node->list, &lru->list); | |
159 | ||
160 | lru->nr_misses++; | |
161 | if (seen) { | |
162 | assert(!bpf_update_elem(lru->map_fd, &key, &node, BPF_EXIST)); | |
163 | } else { | |
164 | lru->nr_unique++; | |
165 | assert(!bpf_update_elem(lru->map_fd, &key, &node, BPF_NOEXIST)); | |
166 | } | |
167 | ||
168 | return seen; | |
169 | } | |
170 | ||
171 | static unsigned int read_keys(const char *dist_file, | |
172 | unsigned long long **keys) | |
173 | { | |
174 | struct stat fst; | |
175 | unsigned long long *retkeys; | |
176 | unsigned int counts = 0; | |
177 | int dist_fd; | |
178 | char *b, *l; | |
179 | int i; | |
180 | ||
181 | dist_fd = open(dist_file, 0); | |
182 | assert(dist_fd != -1); | |
183 | ||
184 | assert(fstat(dist_fd, &fst) == 0); | |
185 | b = malloc(fst.st_size); | |
186 | assert(b); | |
187 | ||
188 | assert(read(dist_fd, b, fst.st_size) == fst.st_size); | |
189 | close(dist_fd); | |
190 | for (i = 0; i < fst.st_size; i++) { | |
191 | if (b[i] == '\n') | |
192 | counts++; | |
193 | } | |
194 | counts++; /* in case the last line has no \n */ | |
195 | ||
196 | retkeys = malloc(counts * sizeof(unsigned long long)); | |
197 | assert(retkeys); | |
198 | ||
199 | counts = 0; | |
200 | for (l = strtok(b, "\n"); l; l = strtok(NULL, "\n")) | |
201 | retkeys[counts++] = strtoull(l, NULL, 10); | |
202 | free(b); | |
203 | ||
204 | *keys = retkeys; | |
205 | ||
206 | return counts; | |
207 | } | |
208 | ||
209 | static int create_map(int map_type, int map_flags, unsigned int size) | |
210 | { | |
211 | int map_fd; | |
212 | ||
213 | map_fd = bpf_create_map(map_type, sizeof(unsigned long long), | |
214 | sizeof(unsigned long long), size, map_flags); | |
215 | ||
216 | if (map_fd == -1) | |
217 | perror("bpf_create_map"); | |
218 | ||
219 | return map_fd; | |
220 | } | |
221 | ||
222 | static int sched_next_online(int pid, int next_to_try) | |
223 | { | |
224 | cpu_set_t cpuset; | |
225 | ||
226 | if (next_to_try == nr_cpus) | |
227 | return -1; | |
228 | ||
229 | while (next_to_try < nr_cpus) { | |
230 | CPU_ZERO(&cpuset); | |
231 | CPU_SET(next_to_try++, &cpuset); | |
232 | if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset)) | |
233 | break; | |
234 | } | |
235 | ||
236 | return next_to_try; | |
237 | } | |
238 | ||
239 | static void run_parallel(unsigned int tasks, void (*fn)(int i, void *data), | |
240 | void *data) | |
241 | { | |
242 | int next_sched_cpu = 0; | |
243 | pid_t pid[tasks]; | |
244 | int i; | |
245 | ||
246 | for (i = 0; i < tasks; i++) { | |
247 | pid[i] = fork(); | |
248 | if (pid[i] == 0) { | |
249 | next_sched_cpu = sched_next_online(0, next_sched_cpu); | |
250 | fn(i, data); | |
251 | exit(0); | |
252 | } else if (pid[i] == -1) { | |
253 | printf("couldn't spawn #%d process\n", i); | |
254 | exit(1); | |
255 | } | |
256 | /* It is mostly redundant and just allow the parent | |
257 | * process to update next_shced_cpu for the next child | |
258 | * process | |
259 | */ | |
260 | next_sched_cpu = sched_next_online(pid[i], next_sched_cpu); | |
261 | } | |
262 | for (i = 0; i < tasks; i++) { | |
263 | int status; | |
264 | ||
265 | assert(waitpid(pid[i], &status, 0) == pid[i]); | |
266 | assert(status == 0); | |
267 | } | |
268 | } | |
269 | ||
270 | static void do_test_lru_dist(int task, void *data) | |
271 | { | |
272 | unsigned int nr_misses = 0; | |
273 | struct pfect_lru pfect_lru; | |
274 | unsigned long long key, value = 1234; | |
275 | unsigned int i; | |
276 | ||
277 | unsigned int lru_map_fd = ((unsigned int *)data)[0]; | |
278 | unsigned int lru_size = ((unsigned int *)data)[1]; | |
279 | unsigned long long key_offset = task * dist_key_counts; | |
280 | ||
281 | pfect_lru_init(&pfect_lru, lru_size, dist_key_counts); | |
282 | ||
283 | for (i = 0; i < dist_key_counts; i++) { | |
284 | key = dist_keys[i] + key_offset; | |
285 | ||
286 | pfect_lru_lookup_or_insert(&pfect_lru, key); | |
287 | ||
288 | if (!bpf_lookup_elem(lru_map_fd, &key, &value)) | |
289 | continue; | |
290 | ||
291 | if (bpf_update_elem(lru_map_fd, &key, &value, BPF_NOEXIST)) { | |
292 | printf("bpf_update_elem(lru_map_fd, %llu): errno:%d\n", | |
293 | key, errno); | |
294 | assert(0); | |
295 | } | |
296 | ||
297 | nr_misses++; | |
298 | } | |
299 | ||
300 | printf(" task:%d BPF LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n", | |
301 | task, pfect_lru.nr_unique, dist_key_counts, nr_misses, | |
302 | dist_key_counts); | |
303 | printf(" task:%d Perfect LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n", | |
304 | task, pfect_lru.nr_unique, pfect_lru.total, | |
305 | pfect_lru.nr_misses, pfect_lru.total); | |
306 | ||
307 | pfect_lru_destroy(&pfect_lru); | |
308 | close(lru_map_fd); | |
309 | } | |
310 | ||
311 | static void test_parallel_lru_dist(int map_type, int map_flags, | |
312 | int nr_tasks, unsigned int lru_size) | |
313 | { | |
314 | int child_data[2]; | |
315 | int lru_map_fd; | |
316 | ||
317 | printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type, | |
318 | map_flags); | |
319 | ||
320 | if (map_flags & BPF_F_NO_COMMON_LRU) | |
321 | lru_map_fd = create_map(map_type, map_flags, | |
322 | nr_cpus * lru_size); | |
323 | else | |
324 | lru_map_fd = create_map(map_type, map_flags, | |
325 | nr_tasks * lru_size); | |
326 | assert(lru_map_fd != -1); | |
327 | ||
328 | child_data[0] = lru_map_fd; | |
329 | child_data[1] = lru_size; | |
330 | ||
331 | run_parallel(nr_tasks, do_test_lru_dist, child_data); | |
332 | ||
333 | close(lru_map_fd); | |
334 | } | |
335 | ||
336 | static void test_lru_loss0(int map_type, int map_flags) | |
337 | { | |
338 | unsigned long long key, value[nr_cpus]; | |
339 | unsigned int old_unused_losses = 0; | |
340 | unsigned int new_unused_losses = 0; | |
341 | unsigned int used_losses = 0; | |
342 | int map_fd; | |
343 | ||
344 | printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, | |
345 | map_flags); | |
346 | ||
347 | assert(sched_next_online(0, 0) != -1); | |
348 | ||
349 | if (map_flags & BPF_F_NO_COMMON_LRU) | |
350 | map_fd = create_map(map_type, map_flags, 900 * nr_cpus); | |
351 | else | |
352 | map_fd = create_map(map_type, map_flags, 900); | |
353 | ||
354 | assert(map_fd != -1); | |
355 | ||
356 | value[0] = 1234; | |
357 | ||
358 | for (key = 1; key <= 1000; key++) { | |
359 | int start_key, end_key; | |
360 | ||
361 | assert(bpf_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0); | |
362 | ||
363 | start_key = 101; | |
364 | end_key = min(key, 900); | |
365 | ||
366 | while (start_key <= end_key) { | |
367 | bpf_lookup_elem(map_fd, &start_key, value); | |
368 | start_key++; | |
369 | } | |
370 | } | |
371 | ||
372 | for (key = 1; key <= 1000; key++) { | |
373 | if (bpf_lookup_elem(map_fd, &key, value)) { | |
374 | if (key <= 100) | |
375 | old_unused_losses++; | |
376 | else if (key <= 900) | |
377 | used_losses++; | |
378 | else | |
379 | new_unused_losses++; | |
380 | } | |
381 | } | |
382 | ||
383 | close(map_fd); | |
384 | ||
385 | printf("older-elem-losses:%d(/100) active-elem-losses:%d(/800) " | |
386 | "newer-elem-losses:%d(/100)\n", | |
387 | old_unused_losses, used_losses, new_unused_losses); | |
388 | } | |
389 | ||
390 | static void test_lru_loss1(int map_type, int map_flags) | |
391 | { | |
392 | unsigned long long key, value[nr_cpus]; | |
393 | int map_fd; | |
394 | unsigned int nr_losses = 0; | |
395 | ||
396 | printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, | |
397 | map_flags); | |
398 | ||
399 | assert(sched_next_online(0, 0) != -1); | |
400 | ||
401 | if (map_flags & BPF_F_NO_COMMON_LRU) | |
402 | map_fd = create_map(map_type, map_flags, 1000 * nr_cpus); | |
403 | else | |
404 | map_fd = create_map(map_type, map_flags, 1000); | |
405 | ||
406 | assert(map_fd != -1); | |
407 | ||
408 | value[0] = 1234; | |
409 | ||
410 | for (key = 1; key <= 1000; key++) | |
411 | assert(!bpf_update_elem(map_fd, &key, value, BPF_NOEXIST)); | |
412 | ||
413 | for (key = 1; key <= 1000; key++) { | |
414 | if (bpf_lookup_elem(map_fd, &key, value)) | |
415 | nr_losses++; | |
416 | } | |
417 | ||
418 | close(map_fd); | |
419 | ||
420 | printf("nr_losses:%d(/1000)\n", nr_losses); | |
421 | } | |
422 | ||
423 | static void do_test_parallel_lru_loss(int task, void *data) | |
424 | { | |
425 | const unsigned int nr_stable_elems = 1000; | |
426 | const unsigned int nr_repeats = 100000; | |
427 | ||
428 | int map_fd = *(int *)data; | |
429 | unsigned long long stable_base; | |
430 | unsigned long long key, value[nr_cpus]; | |
431 | unsigned long long next_ins_key; | |
432 | unsigned int nr_losses = 0; | |
433 | unsigned int i; | |
434 | ||
435 | stable_base = task * nr_repeats * 2 + 1; | |
436 | next_ins_key = stable_base; | |
437 | value[0] = 1234; | |
438 | for (i = 0; i < nr_stable_elems; i++) { | |
439 | assert(bpf_update_elem(map_fd, &next_ins_key, value, | |
440 | BPF_NOEXIST) == 0); | |
441 | next_ins_key++; | |
442 | } | |
443 | ||
444 | for (i = 0; i < nr_repeats; i++) { | |
445 | int rn; | |
446 | ||
447 | rn = rand(); | |
448 | ||
449 | if (rn % 10) { | |
450 | key = rn % nr_stable_elems + stable_base; | |
451 | bpf_lookup_elem(map_fd, &key, value); | |
452 | } else { | |
453 | bpf_update_elem(map_fd, &next_ins_key, value, | |
454 | BPF_NOEXIST); | |
455 | next_ins_key++; | |
456 | } | |
457 | } | |
458 | ||
459 | key = stable_base; | |
460 | for (i = 0; i < nr_stable_elems; i++) { | |
461 | if (bpf_lookup_elem(map_fd, &key, value)) | |
462 | nr_losses++; | |
463 | key++; | |
464 | } | |
465 | ||
466 | printf(" task:%d nr_losses:%u\n", task, nr_losses); | |
467 | } | |
468 | ||
469 | static void test_parallel_lru_loss(int map_type, int map_flags, int nr_tasks) | |
470 | { | |
471 | int map_fd; | |
472 | ||
473 | printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type, | |
474 | map_flags); | |
475 | ||
476 | /* Give 20% more than the active working set */ | |
477 | if (map_flags & BPF_F_NO_COMMON_LRU) | |
478 | map_fd = create_map(map_type, map_flags, | |
479 | nr_cpus * (1000 + 200)); | |
480 | else | |
481 | map_fd = create_map(map_type, map_flags, | |
482 | nr_tasks * (1000 + 200)); | |
483 | ||
484 | assert(map_fd != -1); | |
485 | ||
486 | run_parallel(nr_tasks, do_test_parallel_lru_loss, &map_fd); | |
487 | ||
488 | close(map_fd); | |
489 | } | |
490 | ||
491 | int main(int argc, char **argv) | |
492 | { | |
493 | struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; | |
494 | int map_flags[] = {0, BPF_F_NO_COMMON_LRU}; | |
495 | const char *dist_file; | |
496 | int nr_tasks = 1; | |
497 | int lru_size; | |
498 | int f; | |
499 | ||
500 | if (argc < 4) { | |
501 | printf("Usage: %s <dist-file> <lru-size> <nr-tasks>\n", | |
502 | argv[0]); | |
503 | return -1; | |
504 | } | |
505 | ||
506 | dist_file = argv[1]; | |
507 | lru_size = atoi(argv[2]); | |
508 | nr_tasks = atoi(argv[3]); | |
509 | ||
510 | setbuf(stdout, NULL); | |
511 | ||
512 | assert(!setrlimit(RLIMIT_MEMLOCK, &r)); | |
513 | ||
514 | srand(time(NULL)); | |
515 | ||
e00c7b21 | 516 | nr_cpus = bpf_num_possible_cpus(); |
5db58faf MKL |
517 | assert(nr_cpus != -1); |
518 | printf("nr_cpus:%d\n\n", nr_cpus); | |
519 | ||
520 | nr_tasks = min(nr_tasks, nr_cpus); | |
521 | ||
522 | dist_key_counts = read_keys(dist_file, &dist_keys); | |
523 | if (!dist_key_counts) { | |
524 | printf("%s has no key\n", dist_file); | |
525 | return -1; | |
526 | } | |
527 | ||
528 | for (f = 0; f < sizeof(map_flags) / sizeof(*map_flags); f++) { | |
529 | test_lru_loss0(BPF_MAP_TYPE_LRU_HASH, map_flags[f]); | |
530 | test_lru_loss1(BPF_MAP_TYPE_LRU_HASH, map_flags[f]); | |
531 | test_parallel_lru_loss(BPF_MAP_TYPE_LRU_HASH, map_flags[f], | |
532 | nr_tasks); | |
533 | test_parallel_lru_dist(BPF_MAP_TYPE_LRU_HASH, map_flags[f], | |
534 | nr_tasks, lru_size); | |
535 | printf("\n"); | |
536 | } | |
537 | ||
538 | free(dist_keys); | |
539 | ||
540 | return 0; | |
541 | } |