]>
Commit | Line | Data |
---|---|---|
f91f0fd5 TL |
1 | #include "ec_base_vsx.h" |
2 | ||
3 | void gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, | |
4 | unsigned char **src, unsigned char *dest) | |
5 | { | |
6 | unsigned char *s, *t0; | |
7 | vector unsigned char vX1, vY1; | |
8 | vector unsigned char vX2, vY2; | |
9 | vector unsigned char vX3, vY3; | |
10 | vector unsigned char vX4, vY4; | |
11 | vector unsigned char vX5, vY5; | |
12 | vector unsigned char vX6, vY6; | |
13 | vector unsigned char vX7, vY7; | |
14 | vector unsigned char vX8, vY8; | |
15 | vector unsigned char vhi0, vlo0; | |
16 | int i, j, head; | |
17 | ||
18 | if (vlen < 128) { | |
19 | gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest); | |
20 | ||
21 | for (j = 1; j < vlen; j++) { | |
22 | gf_vect_mad_vsx(len, vlen, j, gftbls, src[j], dest); | |
23 | } | |
24 | return; | |
25 | } | |
26 | ||
27 | t0 = (unsigned char *)dest; | |
28 | ||
29 | head = len % 128; | |
30 | if (head != 0) { | |
31 | gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0); | |
32 | } | |
33 | ||
34 | for (i = head; i < len - 127; i += 128) { | |
35 | vY1 = vY1 ^ vY1; | |
36 | vY2 = vY2 ^ vY2; | |
37 | vY3 = vY3 ^ vY3; | |
38 | vY4 = vY4 ^ vY4; | |
39 | ||
40 | vY5 = vY5 ^ vY5; | |
41 | vY6 = vY6 ^ vY6; | |
42 | vY7 = vY7 ^ vY7; | |
43 | vY8 = vY8 ^ vY8; | |
44 | ||
45 | unsigned char *g0 = &gftbls[0 * 32 * vlen]; | |
46 | ||
47 | for (j = 0; j < vlen; j++) { | |
48 | s = (unsigned char *)src[j]; | |
49 | vX1 = vec_xl(0, s + i); | |
50 | vX2 = vec_xl(16, s + i); | |
51 | vX3 = vec_xl(32, s + i); | |
52 | vX4 = vec_xl(48, s + i); | |
53 | ||
54 | vlo0 = EC_vec_xl(0, g0); | |
55 | vhi0 = EC_vec_xl(16, g0); | |
56 | ||
57 | vX5 = vec_xl(64, s + i); | |
58 | vX6 = vec_xl(80, s + i); | |
59 | vX7 = vec_xl(96, s + i); | |
60 | vX8 = vec_xl(112, s + i); | |
61 | ||
62 | vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1); | |
63 | vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2); | |
64 | vY3 = vY3 ^ EC_vec_permxor(vhi0, vlo0, vX3); | |
65 | vY4 = vY4 ^ EC_vec_permxor(vhi0, vlo0, vX4); | |
66 | ||
67 | vY5 = vY5 ^ EC_vec_permxor(vhi0, vlo0, vX5); | |
68 | vY6 = vY6 ^ EC_vec_permxor(vhi0, vlo0, vX6); | |
69 | vY7 = vY7 ^ EC_vec_permxor(vhi0, vlo0, vX7); | |
70 | vY8 = vY8 ^ EC_vec_permxor(vhi0, vlo0, vX8); | |
71 | ||
72 | g0 += 32; | |
73 | } | |
74 | vec_xst(vY1, 0, t0 + i); | |
75 | vec_xst(vY2, 16, t0 + i); | |
76 | vec_xst(vY3, 32, t0 + i); | |
77 | vec_xst(vY4, 48, t0 + i); | |
78 | ||
79 | vec_xst(vY5, 64, t0 + i); | |
80 | vec_xst(vY6, 80, t0 + i); | |
81 | vec_xst(vY7, 96, t0 + i); | |
82 | vec_xst(vY8, 112, t0 + i); | |
83 | } | |
84 | return; | |
85 | } |