]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | /* SPDX-License-Identifier: BSD-3-Clause |
2 | * Copyright(c) 2017 Cavium, Inc | |
3 | */ | |
7c673cae FG |
4 | /* |
5 | * Reciprocal divide | |
6 | * | |
7 | * Used with permission from original authors | |
8 | * Hannes Frederic Sowa and Daniel Borkmann | |
9 | * | |
10 | * This algorithm is based on the paper "Division by Invariant | |
11 | * Integers Using Multiplication" by Torbjörn Granlund and Peter | |
12 | * L. Montgomery. | |
13 | * | |
14 | * The assembler implementation from Agner Fog, which this code is | |
15 | * based on, can be found here: | |
16 | * http://www.agner.org/optimize/asmlib.zip | |
17 | * | |
18 | * This optimization for A/B is helpful if the divisor B is mostly | |
19 | * runtime invariant. The reciprocal of B is calculated in the | |
20 | * slow-path with reciprocal_value(). The fast-path can then just use | |
21 | * a much faster multiplication operation with a variable dividend A | |
22 | * to calculate the division A/B. | |
23 | */ | |
24 | ||
25 | #ifndef _RTE_RECIPROCAL_H_ | |
26 | #define _RTE_RECIPROCAL_H_ | |
27 | ||
28 | #include <stdint.h> | |
29 | ||
30 | struct rte_reciprocal { | |
31 | uint32_t m; | |
32 | uint8_t sh1, sh2; | |
33 | }; | |
34 | ||
11fdf7f2 TL |
35 | struct rte_reciprocal_u64 { |
36 | uint64_t m; | |
37 | uint8_t sh1, sh2; | |
38 | }; | |
39 | ||
7c673cae FG |
40 | static inline uint32_t rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R) |
41 | { | |
42 | uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32); | |
43 | ||
44 | return (t + ((a - t) >> R.sh1)) >> R.sh2; | |
45 | } | |
46 | ||
11fdf7f2 TL |
47 | static __rte_always_inline uint64_t |
48 | mullhi_u64(uint64_t x, uint64_t y) | |
49 | { | |
50 | #ifdef __SIZEOF_INT128__ | |
51 | __uint128_t xl = x; | |
52 | __uint128_t rl = xl * y; | |
53 | ||
54 | return (rl >> 64); | |
55 | #else | |
56 | uint64_t u0, u1, v0, v1, k, t; | |
57 | uint64_t w1, w2; | |
58 | uint64_t whi; | |
59 | ||
60 | u1 = x >> 32; u0 = x & 0xFFFFFFFF; | |
61 | v1 = y >> 32; v0 = y & 0xFFFFFFFF; | |
62 | ||
63 | t = u0*v0; | |
64 | k = t >> 32; | |
65 | ||
66 | t = u1*v0 + k; | |
67 | w1 = t & 0xFFFFFFFF; | |
68 | w2 = t >> 32; | |
69 | ||
70 | t = u0*v1 + w1; | |
71 | k = t >> 32; | |
72 | ||
73 | whi = u1*v1 + w2 + k; | |
74 | ||
75 | return whi; | |
76 | #endif | |
77 | } | |
78 | ||
79 | static __rte_always_inline uint64_t | |
9f95a23c | 80 | rte_reciprocal_divide_u64(uint64_t a, const struct rte_reciprocal_u64 *R) |
11fdf7f2 TL |
81 | { |
82 | uint64_t t = mullhi_u64(a, R->m); | |
83 | ||
84 | return (t + ((a - t) >> R->sh1)) >> R->sh2; | |
85 | } | |
86 | ||
7c673cae | 87 | struct rte_reciprocal rte_reciprocal_value(uint32_t d); |
11fdf7f2 | 88 | struct rte_reciprocal_u64 rte_reciprocal_value_u64(uint64_t d); |
7c673cae FG |
89 | |
90 | #endif /* _RTE_RECIPROCAL_H_ */ |