]>
Commit | Line | Data |
---|---|---|
bd277fa1 RH |
1 | /* |
2 | * Loongson Multimedia Instruction emulation helpers for QEMU. | |
3 | * | |
4 | * Copyright (c) 2011 Richard Henderson <rth@twiddle.net> | |
5 | * | |
6 | * This library is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2 of the License, or (at your option) any later version. | |
10 | * | |
11 | * This library is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public | |
17 | * License along with this library; if not, see <http://www.gnu.org/licenses/>. | |
18 | */ | |
19 | ||
c684822a | 20 | #include "qemu/osdep.h" |
bd277fa1 | 21 | #include "cpu.h" |
2ef6175a | 22 | #include "exec/helper-proto.h" |
bd277fa1 RH |
23 | |
24 | /* If the byte ordering doesn't matter, i.e. all columns are treated | |
25 | identically, then this union can be used directly. If byte ordering | |
26 | does matter, we generally ignore dumping to memory. */ | |
27 | typedef union { | |
28 | uint8_t ub[8]; | |
29 | int8_t sb[8]; | |
30 | uint16_t uh[4]; | |
31 | int16_t sh[4]; | |
32 | uint32_t uw[2]; | |
33 | int32_t sw[2]; | |
34 | uint64_t d; | |
35 | } LMIValue; | |
36 | ||
37 | /* Some byte ordering issues can be mitigated by XORing in the following. */ | |
38 | #ifdef HOST_WORDS_BIGENDIAN | |
39 | # define BYTE_ORDER_XOR(N) N | |
40 | #else | |
41 | # define BYTE_ORDER_XOR(N) 0 | |
42 | #endif | |
43 | ||
44 | #define SATSB(x) (x < -0x80 ? -0x80 : x > 0x7f ? 0x7f : x) | |
45 | #define SATUB(x) (x > 0xff ? 0xff : x) | |
46 | ||
47 | #define SATSH(x) (x < -0x8000 ? -0x8000 : x > 0x7fff ? 0x7fff : x) | |
48 | #define SATUH(x) (x > 0xffff ? 0xffff : x) | |
49 | ||
50 | #define SATSW(x) \ | |
51 | (x < -0x80000000ll ? -0x80000000ll : x > 0x7fffffff ? 0x7fffffff : x) | |
52 | #define SATUW(x) (x > 0xffffffffull ? 0xffffffffull : x) | |
53 | ||
54 | uint64_t helper_paddsb(uint64_t fs, uint64_t ft) | |
55 | { | |
56 | LMIValue vs, vt; | |
57 | unsigned int i; | |
58 | ||
59 | vs.d = fs; | |
60 | vt.d = ft; | |
61 | for (i = 0; i < 8; ++i) { | |
62 | int r = vs.sb[i] + vt.sb[i]; | |
63 | vs.sb[i] = SATSB(r); | |
64 | } | |
65 | return vs.d; | |
66 | } | |
67 | ||
68 | uint64_t helper_paddusb(uint64_t fs, uint64_t ft) | |
69 | { | |
70 | LMIValue vs, vt; | |
71 | unsigned int i; | |
72 | ||
73 | vs.d = fs; | |
74 | vt.d = ft; | |
75 | for (i = 0; i < 8; ++i) { | |
76 | int r = vs.ub[i] + vt.ub[i]; | |
77 | vs.ub[i] = SATUB(r); | |
78 | } | |
79 | return vs.d; | |
80 | } | |
81 | ||
82 | uint64_t helper_paddsh(uint64_t fs, uint64_t ft) | |
83 | { | |
84 | LMIValue vs, vt; | |
85 | unsigned int i; | |
86 | ||
87 | vs.d = fs; | |
88 | vt.d = ft; | |
89 | for (i = 0; i < 4; ++i) { | |
90 | int r = vs.sh[i] + vt.sh[i]; | |
91 | vs.sh[i] = SATSH(r); | |
92 | } | |
93 | return vs.d; | |
94 | } | |
95 | ||
96 | uint64_t helper_paddush(uint64_t fs, uint64_t ft) | |
97 | { | |
98 | LMIValue vs, vt; | |
99 | unsigned int i; | |
100 | ||
101 | vs.d = fs; | |
102 | vt.d = ft; | |
103 | for (i = 0; i < 4; ++i) { | |
104 | int r = vs.uh[i] + vt.uh[i]; | |
105 | vs.uh[i] = SATUH(r); | |
106 | } | |
107 | return vs.d; | |
108 | } | |
109 | ||
110 | uint64_t helper_paddb(uint64_t fs, uint64_t ft) | |
111 | { | |
112 | LMIValue vs, vt; | |
113 | unsigned int i; | |
114 | ||
115 | vs.d = fs; | |
116 | vt.d = ft; | |
117 | for (i = 0; i < 8; ++i) { | |
118 | vs.ub[i] += vt.ub[i]; | |
119 | } | |
120 | return vs.d; | |
121 | } | |
122 | ||
123 | uint64_t helper_paddh(uint64_t fs, uint64_t ft) | |
124 | { | |
125 | LMIValue vs, vt; | |
126 | unsigned int i; | |
127 | ||
128 | vs.d = fs; | |
129 | vt.d = ft; | |
130 | for (i = 0; i < 4; ++i) { | |
131 | vs.uh[i] += vt.uh[i]; | |
132 | } | |
133 | return vs.d; | |
134 | } | |
135 | ||
136 | uint64_t helper_paddw(uint64_t fs, uint64_t ft) | |
137 | { | |
138 | LMIValue vs, vt; | |
139 | unsigned int i; | |
140 | ||
141 | vs.d = fs; | |
142 | vt.d = ft; | |
143 | for (i = 0; i < 2; ++i) { | |
144 | vs.uw[i] += vt.uw[i]; | |
145 | } | |
146 | return vs.d; | |
147 | } | |
148 | ||
149 | uint64_t helper_psubsb(uint64_t fs, uint64_t ft) | |
150 | { | |
151 | LMIValue vs, vt; | |
152 | unsigned int i; | |
153 | ||
154 | vs.d = fs; | |
155 | vt.d = ft; | |
156 | for (i = 0; i < 8; ++i) { | |
157 | int r = vs.sb[i] - vt.sb[i]; | |
158 | vs.sb[i] = SATSB(r); | |
159 | } | |
160 | return vs.d; | |
161 | } | |
162 | ||
163 | uint64_t helper_psubusb(uint64_t fs, uint64_t ft) | |
164 | { | |
165 | LMIValue vs, vt; | |
166 | unsigned int i; | |
167 | ||
168 | vs.d = fs; | |
169 | vt.d = ft; | |
170 | for (i = 0; i < 8; ++i) { | |
171 | int r = vs.ub[i] - vt.ub[i]; | |
172 | vs.ub[i] = SATUB(r); | |
173 | } | |
174 | return vs.d; | |
175 | } | |
176 | ||
177 | uint64_t helper_psubsh(uint64_t fs, uint64_t ft) | |
178 | { | |
179 | LMIValue vs, vt; | |
180 | unsigned int i; | |
181 | ||
182 | vs.d = fs; | |
183 | vt.d = ft; | |
184 | for (i = 0; i < 4; ++i) { | |
185 | int r = vs.sh[i] - vt.sh[i]; | |
186 | vs.sh[i] = SATSH(r); | |
187 | } | |
188 | return vs.d; | |
189 | } | |
190 | ||
191 | uint64_t helper_psubush(uint64_t fs, uint64_t ft) | |
192 | { | |
193 | LMIValue vs, vt; | |
194 | unsigned int i; | |
195 | ||
196 | vs.d = fs; | |
197 | vt.d = ft; | |
198 | for (i = 0; i < 4; ++i) { | |
199 | int r = vs.uh[i] - vt.uh[i]; | |
200 | vs.uh[i] = SATUH(r); | |
201 | } | |
202 | return vs.d; | |
203 | } | |
204 | ||
205 | uint64_t helper_psubb(uint64_t fs, uint64_t ft) | |
206 | { | |
207 | LMIValue vs, vt; | |
208 | unsigned int i; | |
209 | ||
210 | vs.d = fs; | |
211 | vt.d = ft; | |
212 | for (i = 0; i < 8; ++i) { | |
213 | vs.ub[i] -= vt.ub[i]; | |
214 | } | |
215 | return vs.d; | |
216 | } | |
217 | ||
218 | uint64_t helper_psubh(uint64_t fs, uint64_t ft) | |
219 | { | |
220 | LMIValue vs, vt; | |
221 | unsigned int i; | |
222 | ||
223 | vs.d = fs; | |
224 | vt.d = ft; | |
225 | for (i = 0; i < 4; ++i) { | |
226 | vs.uh[i] -= vt.uh[i]; | |
227 | } | |
228 | return vs.d; | |
229 | } | |
230 | ||
231 | uint64_t helper_psubw(uint64_t fs, uint64_t ft) | |
232 | { | |
233 | LMIValue vs, vt; | |
234 | unsigned int i; | |
235 | ||
236 | vs.d = fs; | |
237 | vt.d = ft; | |
238 | for (i = 0; i < 2; ++i) { | |
239 | vs.uw[i] -= vt.uw[i]; | |
240 | } | |
241 | return vs.d; | |
242 | } | |
243 | ||
244 | uint64_t helper_pshufh(uint64_t fs, uint64_t ft) | |
245 | { | |
246 | unsigned host = BYTE_ORDER_XOR(3); | |
247 | LMIValue vd, vs; | |
248 | unsigned i; | |
249 | ||
250 | vs.d = fs; | |
251 | vd.d = 0; | |
252 | for (i = 0; i < 4; i++, ft >>= 2) { | |
253 | vd.uh[i ^ host] = vs.uh[(ft & 3) ^ host]; | |
254 | } | |
255 | return vd.d; | |
256 | } | |
257 | ||
258 | uint64_t helper_packsswh(uint64_t fs, uint64_t ft) | |
259 | { | |
260 | uint64_t fd = 0; | |
261 | int64_t tmp; | |
262 | ||
263 | tmp = (int32_t)(fs >> 0); | |
264 | tmp = SATSH(tmp); | |
265 | fd |= (tmp & 0xffff) << 0; | |
266 | ||
267 | tmp = (int32_t)(fs >> 32); | |
268 | tmp = SATSH(tmp); | |
269 | fd |= (tmp & 0xffff) << 16; | |
270 | ||
271 | tmp = (int32_t)(ft >> 0); | |
272 | tmp = SATSH(tmp); | |
273 | fd |= (tmp & 0xffff) << 32; | |
274 | ||
275 | tmp = (int32_t)(ft >> 32); | |
276 | tmp = SATSH(tmp); | |
277 | fd |= (tmp & 0xffff) << 48; | |
278 | ||
279 | return fd; | |
280 | } | |
281 | ||
282 | uint64_t helper_packsshb(uint64_t fs, uint64_t ft) | |
283 | { | |
284 | uint64_t fd = 0; | |
285 | unsigned int i; | |
286 | ||
287 | for (i = 0; i < 4; ++i) { | |
288 | int16_t tmp = fs >> (i * 16); | |
289 | tmp = SATSB(tmp); | |
290 | fd |= (uint64_t)(tmp & 0xff) << (i * 8); | |
291 | } | |
292 | for (i = 0; i < 4; ++i) { | |
293 | int16_t tmp = ft >> (i * 16); | |
294 | tmp = SATSB(tmp); | |
295 | fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32); | |
296 | } | |
297 | ||
298 | return fd; | |
299 | } | |
300 | ||
301 | uint64_t helper_packushb(uint64_t fs, uint64_t ft) | |
302 | { | |
303 | uint64_t fd = 0; | |
304 | unsigned int i; | |
305 | ||
306 | for (i = 0; i < 4; ++i) { | |
307 | int16_t tmp = fs >> (i * 16); | |
308 | tmp = SATUB(tmp); | |
309 | fd |= (uint64_t)(tmp & 0xff) << (i * 8); | |
310 | } | |
311 | for (i = 0; i < 4; ++i) { | |
312 | int16_t tmp = ft >> (i * 16); | |
313 | tmp = SATUB(tmp); | |
314 | fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32); | |
315 | } | |
316 | ||
317 | return fd; | |
318 | } | |
319 | ||
320 | uint64_t helper_punpcklwd(uint64_t fs, uint64_t ft) | |
321 | { | |
322 | return (fs & 0xffffffff) | (ft << 32); | |
323 | } | |
324 | ||
325 | uint64_t helper_punpckhwd(uint64_t fs, uint64_t ft) | |
326 | { | |
327 | return (fs >> 32) | (ft & ~0xffffffffull); | |
328 | } | |
329 | ||
330 | uint64_t helper_punpcklhw(uint64_t fs, uint64_t ft) | |
331 | { | |
332 | unsigned host = BYTE_ORDER_XOR(3); | |
333 | LMIValue vd, vs, vt; | |
334 | ||
335 | vs.d = fs; | |
336 | vt.d = ft; | |
337 | vd.uh[0 ^ host] = vs.uh[0 ^ host]; | |
338 | vd.uh[1 ^ host] = vt.uh[0 ^ host]; | |
339 | vd.uh[2 ^ host] = vs.uh[1 ^ host]; | |
340 | vd.uh[3 ^ host] = vt.uh[1 ^ host]; | |
341 | ||
342 | return vd.d; | |
343 | } | |
344 | ||
345 | uint64_t helper_punpckhhw(uint64_t fs, uint64_t ft) | |
346 | { | |
347 | unsigned host = BYTE_ORDER_XOR(3); | |
348 | LMIValue vd, vs, vt; | |
349 | ||
350 | vs.d = fs; | |
351 | vt.d = ft; | |
352 | vd.uh[0 ^ host] = vs.uh[2 ^ host]; | |
353 | vd.uh[1 ^ host] = vt.uh[2 ^ host]; | |
354 | vd.uh[2 ^ host] = vs.uh[3 ^ host]; | |
355 | vd.uh[3 ^ host] = vt.uh[3 ^ host]; | |
356 | ||
357 | return vd.d; | |
358 | } | |
359 | ||
360 | uint64_t helper_punpcklbh(uint64_t fs, uint64_t ft) | |
361 | { | |
362 | unsigned host = BYTE_ORDER_XOR(7); | |
363 | LMIValue vd, vs, vt; | |
364 | ||
365 | vs.d = fs; | |
366 | vt.d = ft; | |
367 | vd.ub[0 ^ host] = vs.ub[0 ^ host]; | |
368 | vd.ub[1 ^ host] = vt.ub[0 ^ host]; | |
369 | vd.ub[2 ^ host] = vs.ub[1 ^ host]; | |
370 | vd.ub[3 ^ host] = vt.ub[1 ^ host]; | |
371 | vd.ub[4 ^ host] = vs.ub[2 ^ host]; | |
372 | vd.ub[5 ^ host] = vt.ub[2 ^ host]; | |
373 | vd.ub[6 ^ host] = vs.ub[3 ^ host]; | |
374 | vd.ub[7 ^ host] = vt.ub[3 ^ host]; | |
375 | ||
376 | return vd.d; | |
377 | } | |
378 | ||
379 | uint64_t helper_punpckhbh(uint64_t fs, uint64_t ft) | |
380 | { | |
381 | unsigned host = BYTE_ORDER_XOR(7); | |
382 | LMIValue vd, vs, vt; | |
383 | ||
384 | vs.d = fs; | |
385 | vt.d = ft; | |
386 | vd.ub[0 ^ host] = vs.ub[4 ^ host]; | |
387 | vd.ub[1 ^ host] = vt.ub[4 ^ host]; | |
388 | vd.ub[2 ^ host] = vs.ub[5 ^ host]; | |
389 | vd.ub[3 ^ host] = vt.ub[5 ^ host]; | |
390 | vd.ub[4 ^ host] = vs.ub[6 ^ host]; | |
391 | vd.ub[5 ^ host] = vt.ub[6 ^ host]; | |
392 | vd.ub[6 ^ host] = vs.ub[7 ^ host]; | |
393 | vd.ub[7 ^ host] = vt.ub[7 ^ host]; | |
394 | ||
395 | return vd.d; | |
396 | } | |
397 | ||
398 | uint64_t helper_pavgh(uint64_t fs, uint64_t ft) | |
399 | { | |
400 | LMIValue vs, vt; | |
401 | unsigned i; | |
402 | ||
403 | vs.d = fs; | |
404 | vt.d = ft; | |
405 | for (i = 0; i < 4; i++) { | |
406 | vs.uh[i] = (vs.uh[i] + vt.uh[i] + 1) >> 1; | |
407 | } | |
408 | return vs.d; | |
409 | } | |
410 | ||
411 | uint64_t helper_pavgb(uint64_t fs, uint64_t ft) | |
412 | { | |
413 | LMIValue vs, vt; | |
414 | unsigned i; | |
415 | ||
416 | vs.d = fs; | |
417 | vt.d = ft; | |
418 | for (i = 0; i < 8; i++) { | |
419 | vs.ub[i] = (vs.ub[i] + vt.ub[i] + 1) >> 1; | |
420 | } | |
421 | return vs.d; | |
422 | } | |
423 | ||
424 | uint64_t helper_pmaxsh(uint64_t fs, uint64_t ft) | |
425 | { | |
426 | LMIValue vs, vt; | |
427 | unsigned i; | |
428 | ||
429 | vs.d = fs; | |
430 | vt.d = ft; | |
431 | for (i = 0; i < 4; i++) { | |
432 | vs.sh[i] = (vs.sh[i] >= vt.sh[i] ? vs.sh[i] : vt.sh[i]); | |
433 | } | |
434 | return vs.d; | |
435 | } | |
436 | ||
437 | uint64_t helper_pminsh(uint64_t fs, uint64_t ft) | |
438 | { | |
439 | LMIValue vs, vt; | |
440 | unsigned i; | |
441 | ||
442 | vs.d = fs; | |
443 | vt.d = ft; | |
444 | for (i = 0; i < 4; i++) { | |
445 | vs.sh[i] = (vs.sh[i] <= vt.sh[i] ? vs.sh[i] : vt.sh[i]); | |
446 | } | |
447 | return vs.d; | |
448 | } | |
449 | ||
450 | uint64_t helper_pmaxub(uint64_t fs, uint64_t ft) | |
451 | { | |
452 | LMIValue vs, vt; | |
453 | unsigned i; | |
454 | ||
455 | vs.d = fs; | |
456 | vt.d = ft; | |
457 | for (i = 0; i < 4; i++) { | |
458 | vs.ub[i] = (vs.ub[i] >= vt.ub[i] ? vs.ub[i] : vt.ub[i]); | |
459 | } | |
460 | return vs.d; | |
461 | } | |
462 | ||
463 | uint64_t helper_pminub(uint64_t fs, uint64_t ft) | |
464 | { | |
465 | LMIValue vs, vt; | |
466 | unsigned i; | |
467 | ||
468 | vs.d = fs; | |
469 | vt.d = ft; | |
470 | for (i = 0; i < 4; i++) { | |
471 | vs.ub[i] = (vs.ub[i] <= vt.ub[i] ? vs.ub[i] : vt.ub[i]); | |
472 | } | |
473 | return vs.d; | |
474 | } | |
475 | ||
476 | uint64_t helper_pcmpeqw(uint64_t fs, uint64_t ft) | |
477 | { | |
478 | LMIValue vs, vt; | |
479 | unsigned i; | |
480 | ||
481 | vs.d = fs; | |
482 | vt.d = ft; | |
483 | for (i = 0; i < 2; i++) { | |
484 | vs.uw[i] = -(vs.uw[i] == vt.uw[i]); | |
485 | } | |
486 | return vs.d; | |
487 | } | |
488 | ||
489 | uint64_t helper_pcmpgtw(uint64_t fs, uint64_t ft) | |
490 | { | |
491 | LMIValue vs, vt; | |
492 | unsigned i; | |
493 | ||
494 | vs.d = fs; | |
495 | vt.d = ft; | |
496 | for (i = 0; i < 2; i++) { | |
497 | vs.uw[i] = -(vs.uw[i] > vt.uw[i]); | |
498 | } | |
499 | return vs.d; | |
500 | } | |
501 | ||
502 | uint64_t helper_pcmpeqh(uint64_t fs, uint64_t ft) | |
503 | { | |
504 | LMIValue vs, vt; | |
505 | unsigned i; | |
506 | ||
507 | vs.d = fs; | |
508 | vt.d = ft; | |
509 | for (i = 0; i < 4; i++) { | |
510 | vs.uh[i] = -(vs.uh[i] == vt.uh[i]); | |
511 | } | |
512 | return vs.d; | |
513 | } | |
514 | ||
515 | uint64_t helper_pcmpgth(uint64_t fs, uint64_t ft) | |
516 | { | |
517 | LMIValue vs, vt; | |
518 | unsigned i; | |
519 | ||
520 | vs.d = fs; | |
521 | vt.d = ft; | |
522 | for (i = 0; i < 4; i++) { | |
523 | vs.uh[i] = -(vs.uh[i] > vt.uh[i]); | |
524 | } | |
525 | return vs.d; | |
526 | } | |
527 | ||
528 | uint64_t helper_pcmpeqb(uint64_t fs, uint64_t ft) | |
529 | { | |
530 | LMIValue vs, vt; | |
531 | unsigned i; | |
532 | ||
533 | vs.d = fs; | |
534 | vt.d = ft; | |
535 | for (i = 0; i < 8; i++) { | |
536 | vs.ub[i] = -(vs.ub[i] == vt.ub[i]); | |
537 | } | |
538 | return vs.d; | |
539 | } | |
540 | ||
541 | uint64_t helper_pcmpgtb(uint64_t fs, uint64_t ft) | |
542 | { | |
543 | LMIValue vs, vt; | |
544 | unsigned i; | |
545 | ||
546 | vs.d = fs; | |
547 | vt.d = ft; | |
548 | for (i = 0; i < 8; i++) { | |
549 | vs.ub[i] = -(vs.ub[i] > vt.ub[i]); | |
550 | } | |
551 | return vs.d; | |
552 | } | |
553 | ||
554 | uint64_t helper_psllw(uint64_t fs, uint64_t ft) | |
555 | { | |
556 | LMIValue vs; | |
557 | unsigned i; | |
558 | ||
559 | ft &= 0x7f; | |
560 | if (ft > 31) { | |
561 | return 0; | |
562 | } | |
563 | vs.d = fs; | |
564 | for (i = 0; i < 2; ++i) { | |
565 | vs.uw[i] <<= ft; | |
566 | } | |
567 | return vs.d; | |
568 | } | |
569 | ||
570 | uint64_t helper_psrlw(uint64_t fs, uint64_t ft) | |
571 | { | |
572 | LMIValue vs; | |
573 | unsigned i; | |
574 | ||
575 | ft &= 0x7f; | |
576 | if (ft > 31) { | |
577 | return 0; | |
578 | } | |
579 | vs.d = fs; | |
580 | for (i = 0; i < 2; ++i) { | |
581 | vs.uw[i] >>= ft; | |
582 | } | |
583 | return vs.d; | |
584 | } | |
585 | ||
586 | uint64_t helper_psraw(uint64_t fs, uint64_t ft) | |
587 | { | |
588 | LMIValue vs; | |
589 | unsigned i; | |
590 | ||
591 | ft &= 0x7f; | |
592 | if (ft > 31) { | |
593 | ft = 31; | |
594 | } | |
595 | vs.d = fs; | |
596 | for (i = 0; i < 2; ++i) { | |
597 | vs.sw[i] >>= ft; | |
598 | } | |
599 | return vs.d; | |
600 | } | |
601 | ||
602 | uint64_t helper_psllh(uint64_t fs, uint64_t ft) | |
603 | { | |
604 | LMIValue vs; | |
605 | unsigned i; | |
606 | ||
607 | ft &= 0x7f; | |
608 | if (ft > 15) { | |
609 | return 0; | |
610 | } | |
611 | vs.d = fs; | |
612 | for (i = 0; i < 4; ++i) { | |
613 | vs.uh[i] <<= ft; | |
614 | } | |
615 | return vs.d; | |
616 | } | |
617 | ||
618 | uint64_t helper_psrlh(uint64_t fs, uint64_t ft) | |
619 | { | |
620 | LMIValue vs; | |
621 | unsigned i; | |
622 | ||
623 | ft &= 0x7f; | |
624 | if (ft > 15) { | |
625 | return 0; | |
626 | } | |
627 | vs.d = fs; | |
628 | for (i = 0; i < 4; ++i) { | |
629 | vs.uh[i] >>= ft; | |
630 | } | |
631 | return vs.d; | |
632 | } | |
633 | ||
634 | uint64_t helper_psrah(uint64_t fs, uint64_t ft) | |
635 | { | |
636 | LMIValue vs; | |
637 | unsigned i; | |
638 | ||
639 | ft &= 0x7f; | |
640 | if (ft > 15) { | |
641 | ft = 15; | |
642 | } | |
643 | vs.d = fs; | |
644 | for (i = 0; i < 4; ++i) { | |
645 | vs.sh[i] >>= ft; | |
646 | } | |
647 | return vs.d; | |
648 | } | |
649 | ||
650 | uint64_t helper_pmullh(uint64_t fs, uint64_t ft) | |
651 | { | |
652 | LMIValue vs, vt; | |
653 | unsigned i; | |
654 | ||
655 | vs.d = fs; | |
656 | vt.d = ft; | |
657 | for (i = 0; i < 4; ++i) { | |
658 | vs.sh[i] *= vt.sh[i]; | |
659 | } | |
660 | return vs.d; | |
661 | } | |
662 | ||
663 | uint64_t helper_pmulhh(uint64_t fs, uint64_t ft) | |
664 | { | |
665 | LMIValue vs, vt; | |
666 | unsigned i; | |
667 | ||
668 | vs.d = fs; | |
669 | vt.d = ft; | |
670 | for (i = 0; i < 4; ++i) { | |
671 | int32_t r = vs.sh[i] * vt.sh[i]; | |
672 | vs.sh[i] = r >> 16; | |
673 | } | |
674 | return vs.d; | |
675 | } | |
676 | ||
677 | uint64_t helper_pmulhuh(uint64_t fs, uint64_t ft) | |
678 | { | |
679 | LMIValue vs, vt; | |
680 | unsigned i; | |
681 | ||
682 | vs.d = fs; | |
683 | vt.d = ft; | |
684 | for (i = 0; i < 4; ++i) { | |
685 | uint32_t r = vs.uh[i] * vt.uh[i]; | |
686 | vs.uh[i] = r >> 16; | |
687 | } | |
688 | return vs.d; | |
689 | } | |
690 | ||
691 | uint64_t helper_pmaddhw(uint64_t fs, uint64_t ft) | |
692 | { | |
693 | unsigned host = BYTE_ORDER_XOR(3); | |
694 | LMIValue vs, vt; | |
695 | uint32_t p0, p1; | |
696 | ||
697 | vs.d = fs; | |
698 | vt.d = ft; | |
699 | p0 = vs.sh[0 ^ host] * vt.sh[0 ^ host]; | |
700 | p0 += vs.sh[1 ^ host] * vt.sh[1 ^ host]; | |
701 | p1 = vs.sh[2 ^ host] * vt.sh[2 ^ host]; | |
702 | p1 += vs.sh[3 ^ host] * vt.sh[3 ^ host]; | |
703 | ||
704 | return ((uint64_t)p1 << 32) | p0; | |
705 | } | |
706 | ||
707 | uint64_t helper_pasubub(uint64_t fs, uint64_t ft) | |
708 | { | |
709 | LMIValue vs, vt; | |
710 | unsigned i; | |
711 | ||
712 | vs.d = fs; | |
713 | vt.d = ft; | |
714 | for (i = 0; i < 8; ++i) { | |
715 | int r = vs.ub[i] - vt.ub[i]; | |
716 | vs.ub[i] = (r < 0 ? -r : r); | |
717 | } | |
718 | return vs.d; | |
719 | } | |
720 | ||
721 | uint64_t helper_biadd(uint64_t fs) | |
722 | { | |
723 | unsigned i, fd; | |
724 | ||
725 | for (i = fd = 0; i < 8; ++i) { | |
726 | fd += (fs >> (i * 8)) & 0xff; | |
727 | } | |
728 | return fd & 0xffff; | |
729 | } | |
730 | ||
731 | uint64_t helper_pmovmskb(uint64_t fs) | |
732 | { | |
733 | unsigned fd = 0; | |
734 | ||
735 | fd |= ((fs >> 7) & 1) << 0; | |
736 | fd |= ((fs >> 15) & 1) << 1; | |
737 | fd |= ((fs >> 23) & 1) << 2; | |
738 | fd |= ((fs >> 31) & 1) << 3; | |
739 | fd |= ((fs >> 39) & 1) << 4; | |
740 | fd |= ((fs >> 47) & 1) << 5; | |
741 | fd |= ((fs >> 55) & 1) << 6; | |
742 | fd |= ((fs >> 63) & 1) << 7; | |
743 | ||
744 | return fd & 0xff; | |
745 | } |