]>
git.proxmox.com Git - mirror_qemu.git/blob - target/s390x/tcg/vec_string_helper.c
2 * QEMU TCG support -- s390x vector string instruction support
4 * Copyright (C) 2019 Red Hat Inc
7 * David Hildenbrand <david@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
12 #include "qemu/osdep.h"
14 #include "s390x-internal.h"
17 #include "tcg/tcg-gvec-desc.h"
18 #include "exec/helper-proto.h"
21 * Returns a bit set in the MSB of each element that is zero,
22 * as defined by the mask.
24 static inline uint64_t zero_search(uint64_t a
, uint64_t mask
)
26 return ~(((a
& mask
) + mask
) | a
| mask
);
30 * Returns a bit set in the MSB of each element that is not zero,
31 * as defined by the mask.
33 static inline uint64_t nonzero_search(uint64_t a
, uint64_t mask
)
35 return (((a
& mask
) + mask
) | a
) & ~mask
;
39 * Returns the byte offset for the first match, or 16 for no match.
41 static inline int match_index(uint64_t c0
, uint64_t c1
)
43 return (c0
? clz64(c0
) : clz64(c1
) + 64) >> 3;
47 * Returns the number of bits composing one element.
49 static uint8_t get_element_bits(uint8_t es
)
51 return (1 << es
) * BITS_PER_BYTE
;
55 * Returns the bitmask for a single element.
57 static uint64_t get_single_element_mask(uint8_t es
)
59 return -1ull >> (64 - get_element_bits(es
));
63 * Returns the bitmask for a single element (excluding the MSB).
65 static uint64_t get_single_element_lsbs_mask(uint8_t es
)
67 return -1ull >> (65 - get_element_bits(es
));
71 * Returns the bitmasks for multiple elements (excluding the MSBs).
73 static uint64_t get_element_lsbs_mask(uint8_t es
)
75 return dup_const(es
, get_single_element_lsbs_mask(es
));
78 static int vfae(void *v1
, const void *v2
, const void *v3
, bool in
,
79 bool rt
, bool zs
, uint8_t es
)
81 const uint64_t mask
= get_element_lsbs_mask(es
);
82 const int bits
= get_element_bits(es
);
83 uint64_t a0
, a1
, b0
, b1
, e0
, e1
, t0
, t1
, z0
, z1
;
84 uint64_t first_zero
= 16;
88 a0
= s390_vec_read_element64(v2
, 0);
89 a1
= s390_vec_read_element64(v2
, 1);
90 b0
= s390_vec_read_element64(v3
, 0);
91 b1
= s390_vec_read_element64(v3
, 1);
94 /* compare against equality with every other element */
95 for (i
= 0; i
< 64; i
+= bits
) {
98 e0
|= zero_search(a0
^ t0
, mask
);
99 e0
|= zero_search(a0
^ t1
, mask
);
100 e1
|= zero_search(a1
^ t0
, mask
);
101 e1
|= zero_search(a1
^ t1
, mask
);
103 /* invert the result if requested - invert only the MSBs */
108 first_equal
= match_index(e0
, e1
);
111 z0
= zero_search(a0
, mask
);
112 z1
= zero_search(a1
, mask
);
113 first_zero
= match_index(z0
, z1
);
117 e0
= (e0
>> (bits
- 1)) * get_single_element_mask(es
);
118 e1
= (e1
>> (bits
- 1)) * get_single_element_mask(es
);
119 s390_vec_write_element64(v1
, 0, e0
);
120 s390_vec_write_element64(v1
, 1, e1
);
122 s390_vec_write_element64(v1
, 0, MIN(first_equal
, first_zero
));
123 s390_vec_write_element64(v1
, 1, 0);
126 if (first_zero
== 16 && first_equal
== 16) {
127 return 3; /* no match */
128 } else if (first_zero
== 16) {
129 return 1; /* matching elements, no match for zero */
130 } else if (first_equal
< first_zero
) {
131 return 2; /* matching elements before match for zero */
133 return 0; /* match for zero */
136 #define DEF_VFAE_HELPER(BITS) \
137 void HELPER(gvec_vfae##BITS)(void *v1, const void *v2, const void *v3, \
140 const bool in = extract32(simd_data(desc), 3, 1); \
141 const bool rt = extract32(simd_data(desc), 2, 1); \
142 const bool zs = extract32(simd_data(desc), 1, 1); \
144 vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \
150 #define DEF_VFAE_CC_HELPER(BITS) \
151 void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3, \
152 CPUS390XState *env, uint32_t desc) \
154 const bool in = extract32(simd_data(desc), 3, 1); \
155 const bool rt = extract32(simd_data(desc), 2, 1); \
156 const bool zs = extract32(simd_data(desc), 1, 1); \
158 env->cc_op = vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \
160 DEF_VFAE_CC_HELPER(8)
161 DEF_VFAE_CC_HELPER(16)
162 DEF_VFAE_CC_HELPER(32)
164 static int vfee(void *v1
, const void *v2
, const void *v3
, bool zs
, uint8_t es
)
166 const uint64_t mask
= get_element_lsbs_mask(es
);
167 uint64_t a0
, a1
, b0
, b1
, e0
, e1
, z0
, z1
;
168 uint64_t first_zero
= 16;
169 uint64_t first_equal
;
171 a0
= s390_vec_read_element64(v2
, 0);
172 a1
= s390_vec_read_element64(v2
, 1);
173 b0
= s390_vec_read_element64(v3
, 0);
174 b1
= s390_vec_read_element64(v3
, 1);
175 e0
= zero_search(a0
^ b0
, mask
);
176 e1
= zero_search(a1
^ b1
, mask
);
177 first_equal
= match_index(e0
, e1
);
180 z0
= zero_search(a0
, mask
);
181 z1
= zero_search(a1
, mask
);
182 first_zero
= match_index(z0
, z1
);
185 s390_vec_write_element64(v1
, 0, MIN(first_equal
, first_zero
));
186 s390_vec_write_element64(v1
, 1, 0);
187 if (first_zero
== 16 && first_equal
== 16) {
188 return 3; /* no match */
189 } else if (first_zero
== 16) {
190 return 1; /* matching elements, no match for zero */
191 } else if (first_equal
< first_zero
) {
192 return 2; /* matching elements before match for zero */
194 return 0; /* match for zero */
197 #define DEF_VFEE_HELPER(BITS) \
198 void HELPER(gvec_vfee##BITS)(void *v1, const void *v2, const void *v3, \
201 const bool zs = extract32(simd_data(desc), 1, 1); \
203 vfee(v1, v2, v3, zs, MO_##BITS); \
209 #define DEF_VFEE_CC_HELPER(BITS) \
210 void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3, \
211 CPUS390XState *env, uint32_t desc) \
213 const bool zs = extract32(simd_data(desc), 1, 1); \
215 env->cc_op = vfee(v1, v2, v3, zs, MO_##BITS); \
217 DEF_VFEE_CC_HELPER(8)
218 DEF_VFEE_CC_HELPER(16)
219 DEF_VFEE_CC_HELPER(32)
221 static int vfene(void *v1
, const void *v2
, const void *v3
, bool zs
, uint8_t es
)
223 const uint64_t mask
= get_element_lsbs_mask(es
);
224 uint64_t a0
, a1
, b0
, b1
, e0
, e1
, z0
, z1
;
225 uint64_t first_zero
= 16;
226 uint64_t first_inequal
;
227 bool smaller
= false;
229 a0
= s390_vec_read_element64(v2
, 0);
230 a1
= s390_vec_read_element64(v2
, 1);
231 b0
= s390_vec_read_element64(v3
, 0);
232 b1
= s390_vec_read_element64(v3
, 1);
233 e0
= nonzero_search(a0
^ b0
, mask
);
234 e1
= nonzero_search(a1
^ b1
, mask
);
235 first_inequal
= match_index(e0
, e1
);
237 /* identify the smaller element */
238 if (first_inequal
< 16) {
239 uint8_t enr
= first_inequal
/ (1 << es
);
240 uint32_t a
= s390_vec_read_element(v2
, enr
, es
);
241 uint32_t b
= s390_vec_read_element(v3
, enr
, es
);
247 z0
= zero_search(a0
, mask
);
248 z1
= zero_search(a1
, mask
);
249 first_zero
= match_index(z0
, z1
);
252 s390_vec_write_element64(v1
, 0, MIN(first_inequal
, first_zero
));
253 s390_vec_write_element64(v1
, 1, 0);
254 if (first_zero
== 16 && first_inequal
== 16) {
256 } else if (first_zero
< first_inequal
) {
259 return smaller
? 1 : 2;
262 #define DEF_VFENE_HELPER(BITS) \
263 void HELPER(gvec_vfene##BITS)(void *v1, const void *v2, const void *v3, \
266 const bool zs = extract32(simd_data(desc), 1, 1); \
268 vfene(v1, v2, v3, zs, MO_##BITS); \
274 #define DEF_VFENE_CC_HELPER(BITS) \
275 void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3, \
276 CPUS390XState *env, uint32_t desc) \
278 const bool zs = extract32(simd_data(desc), 1, 1); \
280 env->cc_op = vfene(v1, v2, v3, zs, MO_##BITS); \
282 DEF_VFENE_CC_HELPER(8)
283 DEF_VFENE_CC_HELPER(16)
284 DEF_VFENE_CC_HELPER(32)
286 static int vistr(void *v1
, const void *v2
, uint8_t es
)
288 const uint64_t mask
= get_element_lsbs_mask(es
);
289 uint64_t a0
= s390_vec_read_element64(v2
, 0);
290 uint64_t a1
= s390_vec_read_element64(v2
, 1);
294 z
= zero_search(a0
, mask
);
296 a0
&= ~(-1ull >> clz64(z
));
300 z
= zero_search(a1
, mask
);
302 a1
&= ~(-1ull >> clz64(z
));
307 s390_vec_write_element64(v1
, 0, a0
);
308 s390_vec_write_element64(v1
, 1, a1
);
312 #define DEF_VISTR_HELPER(BITS) \
313 void HELPER(gvec_vistr##BITS)(void *v1, const void *v2, uint32_t desc) \
315 vistr(v1, v2, MO_##BITS); \
321 #define DEF_VISTR_CC_HELPER(BITS) \
322 void HELPER(gvec_vistr_cc##BITS)(void *v1, const void *v2, CPUS390XState *env, \
325 env->cc_op = vistr(v1, v2, MO_##BITS); \
327 DEF_VISTR_CC_HELPER(8)
328 DEF_VISTR_CC_HELPER(16)
329 DEF_VISTR_CC_HELPER(32)
331 static bool element_compare(uint32_t data
, uint32_t l
, uint8_t c
)
333 const bool equal
= extract32(c
, 7, 1);
334 const bool lower
= extract32(c
, 6, 1);
335 const bool higher
= extract32(c
, 5, 1);
339 } else if (data
> l
) {
345 static int vstrc(void *v1
, const void *v2
, const void *v3
, const void *v4
,
346 bool in
, bool rt
, bool zs
, uint8_t es
)
348 const uint64_t mask
= get_element_lsbs_mask(es
);
349 uint64_t a0
= s390_vec_read_element64(v2
, 0);
350 uint64_t a1
= s390_vec_read_element64(v2
, 1);
351 int first_zero
= 16, first_match
= 16;
352 S390Vector rt_result
= {};
357 z0
= zero_search(a0
, mask
);
358 z1
= zero_search(a1
, mask
);
359 first_zero
= match_index(z0
, z1
);
362 for (i
= 0; i
< 16 / (1 << es
); i
++) {
363 const uint32_t data
= s390_vec_read_element(v2
, i
, es
);
364 const int cur_byte
= i
* (1 << es
);
365 bool any_match
= false;
367 /* if we don't need a bit vector, we can stop early */
368 if (cur_byte
== first_zero
&& !rt
) {
372 for (j
= 0; j
< 16 / (1 << es
); j
+= 2) {
373 const uint32_t l1
= s390_vec_read_element(v3
, j
, es
);
374 const uint32_t l2
= s390_vec_read_element(v3
, j
+ 1, es
);
375 /* we are only interested in the highest byte of each element */
376 const uint8_t c1
= s390_vec_read_element8(v4
, j
* (1 << es
));
377 const uint8_t c2
= s390_vec_read_element8(v4
, (j
+ 1) * (1 << es
));
379 if (element_compare(data
, l1
, c1
) &&
380 element_compare(data
, l2
, c2
)) {
385 /* invert the result if requested */
386 any_match
= in
^ any_match
;
389 /* indicate bit vector if requested */
391 const uint64_t val
= -1ull;
393 first_match
= MIN(cur_byte
, first_match
);
394 s390_vec_write_element(&rt_result
, i
, es
, val
);
396 /* stop on the first match */
397 first_match
= cur_byte
;
404 *(S390Vector
*)v1
= rt_result
;
406 s390_vec_write_element64(v1
, 0, MIN(first_match
, first_zero
));
407 s390_vec_write_element64(v1
, 1, 0);
410 if (first_zero
== 16 && first_match
== 16) {
411 return 3; /* no match */
412 } else if (first_zero
== 16) {
413 return 1; /* matching elements, no match for zero */
414 } else if (first_match
< first_zero
) {
415 return 2; /* matching elements before match for zero */
417 return 0; /* match for zero */
420 #define DEF_VSTRC_HELPER(BITS) \
421 void HELPER(gvec_vstrc##BITS)(void *v1, const void *v2, const void *v3, \
422 const void *v4, uint32_t desc) \
424 const bool in = extract32(simd_data(desc), 3, 1); \
425 const bool zs = extract32(simd_data(desc), 1, 1); \
427 vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS); \
433 #define DEF_VSTRC_RT_HELPER(BITS) \
434 void HELPER(gvec_vstrc_rt##BITS)(void *v1, const void *v2, const void *v3, \
435 const void *v4, uint32_t desc) \
437 const bool in = extract32(simd_data(desc), 3, 1); \
438 const bool zs = extract32(simd_data(desc), 1, 1); \
440 vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS); \
442 DEF_VSTRC_RT_HELPER(8)
443 DEF_VSTRC_RT_HELPER(16)
444 DEF_VSTRC_RT_HELPER(32)
446 #define DEF_VSTRC_CC_HELPER(BITS) \
447 void HELPER(gvec_vstrc_cc##BITS)(void *v1, const void *v2, const void *v3, \
448 const void *v4, CPUS390XState *env, \
451 const bool in = extract32(simd_data(desc), 3, 1); \
452 const bool zs = extract32(simd_data(desc), 1, 1); \
454 env->cc_op = vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS); \
456 DEF_VSTRC_CC_HELPER(8)
457 DEF_VSTRC_CC_HELPER(16)
458 DEF_VSTRC_CC_HELPER(32)
460 #define DEF_VSTRC_CC_RT_HELPER(BITS) \
461 void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void *v2, const void *v3, \
462 const void *v4, CPUS390XState *env, \
465 const bool in = extract32(simd_data(desc), 3, 1); \
466 const bool zs = extract32(simd_data(desc), 1, 1); \
468 env->cc_op = vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS); \
470 DEF_VSTRC_CC_RT_HELPER(8)
471 DEF_VSTRC_CC_RT_HELPER(16)
472 DEF_VSTRC_CC_RT_HELPER(32)