]>
Commit | Line | Data |
---|---|---|
f91f0fd5 TL |
1 | #include "ec_base_vsx.h" |
2 | ||
3 | void gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, | |
4 | unsigned char **src, unsigned char **dest) | |
5 | { | |
6 | unsigned char *s, *t0, *t1; | |
7 | vector unsigned char vX1, vX2, vX3, vX4; | |
8 | vector unsigned char vY1, vY2, vY3, vY4; | |
9 | vector unsigned char vYD, vYE, vYF, vYG; | |
10 | vector unsigned char vhi0, vlo0, vhi1, vlo1; | |
11 | int i, j, head; | |
12 | ||
13 | if (vlen < 128) { | |
14 | gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]); | |
15 | gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]); | |
16 | ||
17 | for (j = 1; j < vlen; j++) { | |
18 | gf_2vect_mad_vsx(len, vlen, j, gftbls, src[j], dest); | |
19 | } | |
20 | return; | |
21 | } | |
22 | ||
23 | t0 = (unsigned char *)dest[0]; | |
24 | t1 = (unsigned char *)dest[1]; | |
25 | ||
26 | head = len % 64; | |
27 | if (head != 0) { | |
28 | gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0); | |
29 | gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1); | |
30 | } | |
31 | ||
32 | for (i = head; i < len - 63; i += 64) { | |
33 | vY1 = vY1 ^ vY1; | |
34 | vY2 = vY2 ^ vY2; | |
35 | vY3 = vY3 ^ vY3; | |
36 | vY4 = vY4 ^ vY4; | |
37 | ||
38 | vYD = vYD ^ vYD; | |
39 | vYE = vYE ^ vYE; | |
40 | vYF = vYF ^ vYF; | |
41 | vYG = vYG ^ vYG; | |
42 | ||
43 | unsigned char *g0 = &gftbls[0 * 32 * vlen]; | |
44 | unsigned char *g1 = &gftbls[1 * 32 * vlen]; | |
45 | ||
46 | for (j = 0; j < vlen; j++) { | |
47 | s = (unsigned char *)src[j]; | |
48 | vX1 = vec_xl(0, s + i); | |
49 | vX2 = vec_xl(16, s + i); | |
50 | vX3 = vec_xl(32, s + i); | |
51 | vX4 = vec_xl(48, s + i); | |
52 | ||
53 | vlo0 = EC_vec_xl(0, g0); | |
54 | vhi0 = EC_vec_xl(16, g0); | |
55 | vlo1 = EC_vec_xl(0, g1); | |
56 | vhi1 = EC_vec_xl(16, g1); | |
57 | ||
58 | vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1); | |
59 | vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2); | |
60 | vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3); | |
61 | vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4); | |
62 | ||
63 | vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1); | |
64 | vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2); | |
65 | vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3); | |
66 | vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4); | |
67 | ||
68 | g0 += 32; | |
69 | g1 += 32; | |
70 | } | |
71 | ||
72 | vec_xst(vY1, 0, t0 + i); | |
73 | vec_xst(vY2, 16, t0 + i); | |
74 | vec_xst(vY3, 0, t1 + i); | |
75 | vec_xst(vY4, 16, t1 + i); | |
76 | ||
77 | vec_xst(vYD, 32, t0 + i); | |
78 | vec_xst(vYE, 48, t0 + i); | |
79 | vec_xst(vYF, 32, t1 + i); | |
80 | vec_xst(vYG, 48, t1 + i); | |
81 | } | |
82 | return; | |
83 | } |