2 * Ceph - scalable distributed file system
4 * Copyright (C) 2014 CERN (Switzerland)
5 * * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch> *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
13 // -----------------------------------------------------------------------------
17 #include "arch/intel.h"
18 // -----------------------------------------------------------------------------
21 // -----------------------------------------------------------------------------
24 // -----------------------------------------------------------------------------
25 byte_xor(unsigned char* cw
, unsigned char* dw
, unsigned char* ew
)
26 // -----------------------------------------------------------------------------
32 // -----------------------------------------------------------------------------
35 // -----------------------------------------------------------------------------
36 vector_xor(vector_op_t
* cw
,
39 // -----------------------------------------------------------------------------
41 assert(is_aligned(cw
, EC_ISA_VECTOR_OP_WORDSIZE
));
42 assert(is_aligned(dw
, EC_ISA_VECTOR_OP_WORDSIZE
));
43 assert(is_aligned(ew
, EC_ISA_VECTOR_OP_WORDSIZE
));
50 // -----------------------------------------------------------------------------
53 // -----------------------------------------------------------------------------
54 region_xor(unsigned char** src
,
55 unsigned char* parity
,
70 // just copy source to parity
71 memcpy(parity
, src
[0], size
);
75 unsigned size_left
= size
;
77 // ----------------------------------------------------------
78 // region or vector XOR operations require aligned addresses
79 // ----------------------------------------------------------
81 bool src_aligned
= true;
82 for (int i
= 0; i
< src_size
; i
++) {
83 src_aligned
&= is_aligned(src
[i
], EC_ISA_VECTOR_OP_WORDSIZE
);
87 is_aligned(parity
, EC_ISA_VECTOR_OP_WORDSIZE
)) {
90 if (ceph_arch_intel_sse2
) {
91 // -----------------------------
92 // use SSE2 region xor function
93 // -----------------------------
94 unsigned region_size
=
95 (size
/ EC_ISA_VECTOR_SSE2_WORDSIZE
) * EC_ISA_VECTOR_SSE2_WORDSIZE
;
97 size_left
-= region_size
;
99 region_sse2_xor((char**) src
, (char*) parity
, src_size
, region_size
);
103 // --------------------------------------------
104 // use region xor based on vector xor operation
105 // --------------------------------------------
106 unsigned vector_words
= size
/ EC_ISA_VECTOR_OP_WORDSIZE
;
107 unsigned vector_size
= vector_words
* EC_ISA_VECTOR_OP_WORDSIZE
;
108 memcpy(parity
, src
[0], vector_size
);
110 size_left
-= vector_size
;
111 vector_op_t
* p_vec
= (vector_op_t
*) parity
;
112 for (int i
= 1; i
< src_size
; i
++) {
113 vector_op_t
* s_vec
= (vector_op_t
*) src
[i
];
114 vector_op_t
* e_vec
= s_vec
+ vector_words
;
115 vector_xor(s_vec
, p_vec
, e_vec
);
121 // --------------------------------------------------
122 // xor the not aligned part with byte-wise region xor
123 // --------------------------------------------------
124 memcpy(parity
+ size
- size_left
, src
[0] + size
- size_left
, size_left
);
125 for (int i
= 1; i
< src_size
; i
++) {
126 byte_xor(src
[i
] + size
- size_left
, parity
+ size
- size_left
, src
[i
] + size
);
131 // -----------------------------------------------------------------------------
134 // -----------------------------------------------------------------------------
135 region_sse2_xor(char** src
,
139 // -----------------------------------------------------------------------------
142 assert(!(size
% EC_ISA_VECTOR_SSE2_WORDSIZE
));
146 unsigned char* vbuf
[256];
148 for (int v
= 0; v
< src_size
; v
++) {
149 vbuf
[v
] = (unsigned char*) src
[v
];
153 p
= (unsigned char*) parity
;
155 for (i
= 0; i
< size
; i
+= EC_ISA_VECTOR_SSE2_WORDSIZE
) {
156 asm volatile("movdqa %0,%%xmm0" : : "m" (vbuf
[0][i
]));
157 asm volatile("movdqa %0,%%xmm1" : : "m" (vbuf
[0][i
+ 16]));
158 asm volatile("movdqa %0,%%xmm2" : : "m" (vbuf
[0][i
+ 32]));
159 asm volatile("movdqa %0,%%xmm3" : : "m" (vbuf
[0][i
+ 48]));
161 for (d
= 1; d
< l
; d
++) {
162 asm volatile("movdqa %0,%%xmm4" : : "m" (vbuf
[d
][i
]));
163 asm volatile("movdqa %0,%%xmm5" : : "m" (vbuf
[d
][i
+ 16]));
164 asm volatile("movdqa %0,%%xmm6" : : "m" (vbuf
[d
][i
+ 32]));
165 asm volatile("movdqa %0,%%xmm7" : : "m" (vbuf
[d
][i
+ 48]));
166 asm volatile("pxor %xmm4,%xmm0");
167 asm volatile("pxor %xmm5,%xmm1");
168 asm volatile("pxor %xmm6,%xmm2");
169 asm volatile("pxor %xmm7,%xmm3");
171 asm volatile("movntdq %%xmm0,%0" : "=m" (p
[i
]));
172 asm volatile("movntdq %%xmm1,%0" : "=m" (p
[i
+ 16]));
173 asm volatile("movntdq %%xmm2,%0" : "=m" (p
[i
+ 32]));
174 asm volatile("movntdq %%xmm3,%0" : "=m" (p
[i
+ 48]));
177 asm volatile("sfence" : : : "memory");