]> git.proxmox.com Git - mirror_qemu.git/blame - util/bufferiszero.c
cutils: Remove ppc buffer zero checking
[mirror_qemu.git] / util / bufferiszero.c
CommitLineData
88ca8e80
RH
1/*
2 * Simple C functions to supplement the C library
3 *
4 * Copyright (c) 2006 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24#include "qemu/osdep.h"
25#include "qemu-common.h"
26#include "qemu/cutils.h"
5e33a872 27#include "qemu/bswap.h"
88ca8e80
RH
28
29
30/* vector definitions */
5e33a872
RH
31
32extern void link_error(void);
33
34#define ACCEL_BUFFER_ZERO(NAME, SIZE, VECTYPE, NONZERO) \
35static bool NAME(const void *buf, size_t len) \
36{ \
37 const void *end = buf + len; \
38 do { \
39 const VECTYPE *p = buf; \
40 VECTYPE t; \
41 if (SIZE == sizeof(VECTYPE) * 4) { \
42 t = (p[0] | p[1]) | (p[2] | p[3]); \
43 } else if (SIZE == sizeof(VECTYPE) * 8) { \
44 t = p[0] | p[1]; \
45 t |= p[2] | p[3]; \
46 t |= p[4] | p[5]; \
47 t |= p[6] | p[7]; \
48 } else { \
49 link_error(); \
50 } \
51 if (unlikely(NONZERO(t))) { \
52 return false; \
53 } \
54 buf += SIZE; \
55 } while (buf < end); \
56 return true; \
57}
58
59static bool
60buffer_zero_int(const void *buf, size_t len)
61{
62 if (unlikely(len < 8)) {
63 /* For a very small buffer, simply accumulate all the bytes. */
64 const unsigned char *p = buf;
65 const unsigned char *e = buf + len;
66 unsigned char t = 0;
67
68 do {
69 t |= *p++;
70 } while (p < e);
71
72 return t == 0;
73 } else {
74 /* Otherwise, use the unaligned memory access functions to
75 handle the beginning and end of the buffer, with a couple
76 of loops handling the middle aligned section. */
77 uint64_t t = ldq_he_p(buf);
78 const uint64_t *p = (uint64_t *)(((uintptr_t)buf + 8) & -8);
79 const uint64_t *e = (uint64_t *)(((uintptr_t)buf + len) & -8);
80
81 for (; p + 8 <= e; p += 8) {
82 __builtin_prefetch(p + 8);
83 if (t) {
84 return false;
85 }
86 t = p[0] | p[1] | p[2] | p[3] | p[4] | p[5] | p[6] | p[7];
87 }
88 while (p < e) {
89 t |= *p++;
90 }
91 t |= ldq_he_p(buf + len - 8);
92
93 return t == 0;
94 }
95}
96
43ff5e01 97#if defined(CONFIG_AVX2_OPT) || (defined(CONFIG_CPUID_H) && defined(__SSE2__))
5e33a872 98#include <cpuid.h>
88ca8e80 99
5e33a872
RH
100/* Do not use push_options pragmas unnecessarily, because clang
101 * does not support them.
102 */
103#ifndef __SSE2__
104#pragma GCC push_options
105#pragma GCC target("sse2")
106#endif
107#include <emmintrin.h>
108#define SSE2_NONZERO(X) \
109 (_mm_movemask_epi8(_mm_cmpeq_epi8((X), _mm_setzero_si128())) != 0xFFFF)
110ACCEL_BUFFER_ZERO(buffer_zero_sse2, 64, __m128i, SSE2_NONZERO)
111#ifndef __SSE2__
112#pragma GCC pop_options
113#endif
88ca8e80 114
5e33a872 115#ifdef CONFIG_AVX2_OPT
88ca8e80
RH
116#pragma GCC push_options
117#pragma GCC target("avx2")
88ca8e80 118#include <immintrin.h>
5e33a872
RH
119#define AVX2_NONZERO(X) !_mm256_testz_si256((X), (X))
120ACCEL_BUFFER_ZERO(buffer_zero_avx2, 128, __m256i, AVX2_NONZERO)
121#pragma GCC pop_options
122#endif
88ca8e80 123
5e33a872
RH
124#define CACHE_AVX2 2
125#define CACHE_AVX1 4
126#define CACHE_SSE4 8
127#define CACHE_SSE2 16
88ca8e80 128
5e33a872 129static unsigned cpuid_cache;
88ca8e80 130
5e33a872 131static void __attribute__((constructor)) init_cpuid_cache(void)
88ca8e80 132{
5e33a872
RH
133 int max = __get_cpuid_max(0, NULL);
134 int a, b, c, d;
135 unsigned cache = 0;
88ca8e80 136
5e33a872
RH
137 if (max >= 1) {
138 __cpuid(1, a, b, c, d);
139 if (d & bit_SSE2) {
140 cache |= CACHE_SSE2;
141 }
142#ifdef CONFIG_AVX2_OPT
143 if (c & bit_SSE4_1) {
144 cache |= CACHE_SSE4;
88ca8e80 145 }
88ca8e80 146
5e33a872
RH
147 /* We must check that AVX is not just available, but usable. */
148 if ((c & bit_OSXSAVE) && (c & bit_AVX)) {
149 __asm("xgetbv" : "=a"(a), "=d"(d) : "c"(0));
150 if ((a & 6) == 6) {
151 cache |= CACHE_AVX1;
152 if (max >= 7) {
153 __cpuid_count(7, 0, a, b, c, d);
154 if (b & bit_AVX2) {
155 cache |= CACHE_AVX2;
156 }
157 }
158 }
88ca8e80 159 }
5e33a872 160#endif
88ca8e80 161 }
5e33a872 162 cpuid_cache = cache;
88ca8e80
RH
163}
164
5e33a872 165static bool select_accel_fn(const void *buf, size_t len)
88ca8e80 166{
5e33a872
RH
167 uintptr_t ibuf = (uintptr_t)buf;
168#ifdef CONFIG_AVX2_OPT
169 if (len % 128 == 0 && ibuf % 32 == 0 && (cpuid_cache & CACHE_AVX2)) {
170 return buffer_zero_avx2(buf, len);
88ca8e80 171 }
5e33a872
RH
172#endif
173 if (len % 64 == 0 && ibuf % 16 == 0 && (cpuid_cache & CACHE_SSE2)) {
174 return buffer_zero_sse2(buf, len);
175 }
176 return buffer_zero_int(buf, len);
88ca8e80
RH
177}
178
5e33a872
RH
179#else
180#define select_accel_fn buffer_zero_int
88ca8e80
RH
181#endif
182
183/*
184 * Checks if a buffer is all zeroes
88ca8e80
RH
185 */
186bool buffer_is_zero(const void *buf, size_t len)
187{
5e33a872
RH
188 if (unlikely(len == 0)) {
189 return true;
88ca8e80
RH
190 }
191
5e33a872
RH
192 /* Use an optimized zero check if possible. Note that this also
193 includes a check for an unrolled loop over 64-bit integers. */
194 return select_accel_fn(buf, len);
88ca8e80 195}