]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include "include/int_types.h" | |
16 | #include "include/types.h" | |
17 | ||
18 | #include <unistd.h> | |
19 | #include <fcntl.h> | |
20 | #include <errno.h> | |
21 | #include <stdlib.h> | |
22 | #include <sys/types.h> | |
23 | #include <sys/stat.h> | |
24 | #include <sys/ioctl.h> | |
25 | #include "include/compat.h" | |
26 | #include "include/linux_fiemap.h" | |
27 | #include "include/color.h" | |
28 | #include "include/buffer.h" | |
11fdf7f2 | 29 | #include "include/ceph_assert.h" |
7c673cae FG |
30 | |
31 | #ifndef __CYGWIN__ | |
32 | #include "os/fs/btrfs_ioctl.h" | |
33 | #endif | |
34 | ||
35 | #include <iostream> | |
36 | #include <fstream> | |
37 | #include <sstream> | |
38 | ||
39 | #include "BtrfsFileStoreBackend.h" | |
40 | ||
41 | #include "common/errno.h" | |
42 | #include "common/config.h" | |
43 | ||
44 | #if defined(__linux__) | |
45 | ||
46 | #define dout_context cct() | |
47 | #define dout_subsys ceph_subsys_filestore | |
48 | #undef dout_prefix | |
49 | #define dout_prefix *_dout << "btrfsfilestorebackend(" << get_basedir_path() << ") " | |
50 | ||
f67539c2 TL |
51 | using std::cerr; |
52 | using std::list; | |
53 | using std::string; | |
54 | ||
7c673cae FG |
55 | #define ALIGN_DOWN(x, by) ((x) - ((x) % (by))) |
56 | #define ALIGNED(x, by) (!((x) % (by))) | |
57 | #define ALIGN_UP(x, by) (ALIGNED((x), (by)) ? (x) : (ALIGN_DOWN((x), (by)) + (by))) | |
58 | ||
59 | BtrfsFileStoreBackend::BtrfsFileStoreBackend(FileStore *fs): | |
60 | GenericFileStoreBackend(fs), has_clone_range(false), | |
61 | has_snap_create(false), has_snap_destroy(false), | |
62 | has_snap_create_v2(false), has_wait_sync(false), stable_commits(false), | |
63 | m_filestore_btrfs_clone_range(cct()->_conf->filestore_btrfs_clone_range), | |
64 | m_filestore_btrfs_snap (cct()->_conf->filestore_btrfs_snap) { } | |
65 | ||
66 | int BtrfsFileStoreBackend::detect_features() | |
67 | { | |
68 | int r; | |
69 | ||
70 | r = GenericFileStoreBackend::detect_features(); | |
71 | if (r < 0) | |
72 | return r; | |
73 | ||
74 | // clone_range? | |
75 | if (m_filestore_btrfs_clone_range) { | |
91327a77 | 76 | int fd = ::openat(get_basedir_fd(), "clone_range_test", O_CREAT|O_WRONLY|O_CLOEXEC, 0600); |
7c673cae FG |
77 | if (fd >= 0) { |
78 | if (::unlinkat(get_basedir_fd(), "clone_range_test", 0) < 0) { | |
79 | r = -errno; | |
80 | dout(0) << "detect_feature: failed to unlink test file for CLONE_RANGE ioctl: " | |
81 | << cpp_strerror(r) << dendl; | |
82 | } | |
83 | btrfs_ioctl_clone_range_args clone_args; | |
84 | memset(&clone_args, 0, sizeof(clone_args)); | |
85 | clone_args.src_fd = -1; | |
86 | r = ::ioctl(fd, BTRFS_IOC_CLONE_RANGE, &clone_args); | |
87 | if (r < 0 && errno == EBADF) { | |
88 | dout(0) << "detect_feature: CLONE_RANGE ioctl is supported" << dendl; | |
89 | has_clone_range = true; | |
90 | } else { | |
91 | r = -errno; | |
92 | dout(0) << "detect_feature: CLONE_RANGE ioctl is NOT supported: " << cpp_strerror(r) << dendl; | |
93 | } | |
94 | TEMP_FAILURE_RETRY(::close(fd)); | |
95 | } else { | |
96 | r = -errno; | |
97 | dout(0) << "detect_feature: failed to create test file for CLONE_RANGE ioctl: " | |
98 | << cpp_strerror(r) << dendl; | |
99 | } | |
100 | } else { | |
101 | dout(0) << "detect_feature: CLONE_RANGE ioctl is DISABLED via 'filestore btrfs clone range' option" << dendl; | |
102 | } | |
103 | ||
104 | struct btrfs_ioctl_vol_args vol_args; | |
105 | memset(&vol_args, 0, sizeof(vol_args)); | |
106 | ||
107 | // create test source volume | |
108 | vol_args.fd = 0; | |
109 | strcpy(vol_args.name, "test_subvol"); | |
110 | r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SUBVOL_CREATE, &vol_args); | |
111 | if (r != 0) { | |
112 | r = -errno; | |
113 | dout(0) << "detect_feature: failed to create simple subvolume " << vol_args.name << ": " << cpp_strerror(r) << dendl; | |
114 | } | |
91327a77 | 115 | int srcfd = ::openat(get_basedir_fd(), vol_args.name, O_RDONLY|O_CLOEXEC); |
7c673cae FG |
116 | if (srcfd < 0) { |
117 | r = -errno; | |
118 | dout(0) << "detect_feature: failed to open " << vol_args.name << ": " << cpp_strerror(r) << dendl; | |
119 | } | |
120 | ||
121 | // snap_create and snap_destroy? | |
122 | vol_args.fd = srcfd; | |
123 | strcpy(vol_args.name, "sync_snap_test"); | |
124 | r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_CREATE, &vol_args); | |
125 | int err = errno; | |
126 | if (r == 0 || errno == EEXIST) { | |
127 | dout(0) << "detect_feature: SNAP_CREATE is supported" << dendl; | |
128 | has_snap_create = true; | |
129 | ||
130 | r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_DESTROY, &vol_args); | |
131 | if (r == 0) { | |
132 | dout(0) << "detect_feature: SNAP_DESTROY is supported" << dendl; | |
133 | has_snap_destroy = true; | |
134 | } else { | |
135 | err = -errno; | |
136 | dout(0) << "detect_feature: SNAP_DESTROY failed: " << cpp_strerror(err) << dendl; | |
137 | ||
138 | if (err == -EPERM && getuid() != 0) { | |
139 | dout(0) << "detect_feature: failed with EPERM as non-root; remount with -o user_subvol_rm_allowed" << dendl; | |
140 | cerr << TEXT_YELLOW | |
141 | << "btrfs SNAP_DESTROY failed as non-root; remount with -o user_subvol_rm_allowed" | |
142 | << TEXT_NORMAL << std::endl; | |
143 | } else if (err == -EOPNOTSUPP) { | |
144 | derr << "btrfs SNAP_DESTROY ioctl not supported; you need a kernel newer than 2.6.32" << dendl; | |
145 | } | |
146 | } | |
147 | } else { | |
148 | dout(0) << "detect_feature: SNAP_CREATE failed: " << cpp_strerror(err) << dendl; | |
149 | } | |
150 | ||
151 | if (m_filestore_btrfs_snap) { | |
152 | if (has_snap_destroy) | |
153 | stable_commits = true; | |
154 | else | |
155 | dout(0) << "detect_feature: snaps enabled, but no SNAP_DESTROY ioctl; DISABLING" << dendl; | |
156 | } | |
157 | ||
158 | // start_sync? | |
159 | __u64 transid = 0; | |
160 | r = ::ioctl(get_basedir_fd(), BTRFS_IOC_START_SYNC, &transid); | |
161 | if (r < 0) { | |
162 | int err = errno; | |
163 | dout(0) << "detect_feature: START_SYNC got " << cpp_strerror(err) << dendl; | |
164 | } | |
165 | if (r == 0 && transid > 0) { | |
166 | dout(0) << "detect_feature: START_SYNC is supported (transid " << transid << ")" << dendl; | |
167 | ||
168 | // do we have wait_sync too? | |
169 | r = ::ioctl(get_basedir_fd(), BTRFS_IOC_WAIT_SYNC, &transid); | |
170 | if (r == 0 || errno == ERANGE) { | |
171 | dout(0) << "detect_feature: WAIT_SYNC is supported" << dendl; | |
172 | has_wait_sync = true; | |
173 | } else { | |
174 | int err = errno; | |
175 | dout(0) << "detect_feature: WAIT_SYNC is NOT supported: " << cpp_strerror(err) << dendl; | |
176 | } | |
177 | } else { | |
178 | int err = errno; | |
179 | dout(0) << "detect_feature: START_SYNC is NOT supported: " << cpp_strerror(err) << dendl; | |
180 | } | |
181 | ||
182 | if (has_wait_sync) { | |
183 | // async snap creation? | |
184 | struct btrfs_ioctl_vol_args_v2 async_args; | |
185 | memset(&async_args, 0, sizeof(async_args)); | |
186 | async_args.fd = srcfd; | |
187 | async_args.flags = BTRFS_SUBVOL_CREATE_ASYNC; | |
188 | strcpy(async_args.name, "async_snap_test"); | |
189 | ||
190 | // remove old one, first | |
191 | struct stat st; | |
192 | strcpy(vol_args.name, async_args.name); | |
193 | if (::fstatat(get_basedir_fd(), vol_args.name, &st, 0) == 0) { | |
194 | dout(0) << "detect_feature: removing old async_snap_test" << dendl; | |
195 | r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_DESTROY, &vol_args); | |
196 | if (r != 0) { | |
197 | int err = errno; | |
198 | dout(0) << "detect_feature: failed to remove old async_snap_test: " << cpp_strerror(err) << dendl; | |
199 | } | |
200 | } | |
201 | ||
202 | r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_CREATE_V2, &async_args); | |
203 | if (r == 0 || errno == EEXIST) { | |
204 | dout(0) << "detect_feature: SNAP_CREATE_V2 is supported" << dendl; | |
205 | has_snap_create_v2 = true; | |
206 | ||
207 | // clean up | |
208 | strcpy(vol_args.name, "async_snap_test"); | |
209 | r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_DESTROY, &vol_args); | |
210 | if (r != 0) { | |
211 | int err = errno; | |
212 | dout(0) << "detect_feature: SNAP_DESTROY failed: " << cpp_strerror(err) << dendl; | |
213 | } | |
214 | } else { | |
215 | int err = errno; | |
216 | dout(0) << "detect_feature: SNAP_CREATE_V2 is NOT supported: " << cpp_strerror(err) << dendl; | |
217 | } | |
218 | } | |
219 | ||
220 | // clean up test subvol | |
221 | if (srcfd >= 0) | |
222 | TEMP_FAILURE_RETRY(::close(srcfd)); | |
223 | ||
224 | strcpy(vol_args.name, "test_subvol"); | |
225 | r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_DESTROY, &vol_args); | |
226 | if (r < 0) { | |
227 | r = -errno; | |
228 | dout(0) << "detect_feature: failed to remove " << vol_args.name << ": " << cpp_strerror(r) << dendl; | |
229 | } | |
230 | ||
231 | if (m_filestore_btrfs_snap && !has_snap_create_v2) { | |
232 | dout(0) << "mount WARNING: btrfs snaps enabled, but no SNAP_CREATE_V2 ioctl (from kernel 2.6.37+)" << dendl; | |
233 | cerr << TEXT_YELLOW | |
234 | << " ** WARNING: 'filestore btrfs snap' is enabled (for safe transactions,\n" | |
235 | << " rollback), but btrfs does not support the SNAP_CREATE_V2 ioctl\n" | |
236 | << " (added in Linux 2.6.37). Expect slow btrfs sync/commit\n" | |
237 | << " performance.\n" | |
238 | << TEXT_NORMAL; | |
239 | } | |
240 | ||
241 | return 0; | |
242 | } | |
243 | ||
244 | bool BtrfsFileStoreBackend::can_checkpoint() | |
245 | { | |
246 | return stable_commits; | |
247 | } | |
248 | ||
249 | int BtrfsFileStoreBackend::create_current() | |
250 | { | |
251 | struct stat st; | |
252 | int ret = ::stat(get_current_path().c_str(), &st); | |
253 | if (ret == 0) { | |
254 | // current/ exists | |
255 | if (!S_ISDIR(st.st_mode)) { | |
256 | dout(0) << "create_current: current/ exists but is not a directory" << dendl; | |
257 | return -EINVAL; | |
258 | } | |
259 | ||
260 | struct stat basest; | |
261 | struct statfs currentfs; | |
262 | ret = ::fstat(get_basedir_fd(), &basest); | |
263 | if (ret < 0) { | |
264 | ret = -errno; | |
265 | dout(0) << "create_current: cannot fstat basedir " << cpp_strerror(ret) << dendl; | |
266 | return ret; | |
267 | } | |
268 | ret = ::statfs(get_current_path().c_str(), ¤tfs); | |
269 | if (ret < 0) { | |
270 | ret = -errno; | |
271 | dout(0) << "create_current: cannot statsf basedir " << cpp_strerror(ret) << dendl; | |
272 | return ret; | |
273 | } | |
274 | if (currentfs.f_type == BTRFS_SUPER_MAGIC && basest.st_dev != st.st_dev) { | |
275 | dout(2) << "create_current: current appears to be a btrfs subvolume" << dendl; | |
276 | stable_commits = true; | |
277 | } | |
278 | return 0; | |
279 | } | |
280 | ||
281 | struct btrfs_ioctl_vol_args volargs; | |
282 | memset(&volargs, 0, sizeof(volargs)); | |
283 | ||
284 | volargs.fd = 0; | |
285 | strcpy(volargs.name, "current"); | |
286 | if (::ioctl(get_basedir_fd(), BTRFS_IOC_SUBVOL_CREATE, (unsigned long int)&volargs) < 0) { | |
287 | ret = -errno; | |
288 | dout(0) << "create_current: BTRFS_IOC_SUBVOL_CREATE failed with error " | |
289 | << cpp_strerror(ret) << dendl; | |
290 | return ret; | |
291 | } | |
292 | ||
293 | dout(2) << "create_current: created btrfs subvol " << get_current_path() << dendl; | |
294 | if (::chmod(get_current_path().c_str(), 0755) < 0) { | |
295 | ret = -errno; | |
296 | dout(0) << "create_current: failed to chmod " << get_current_path() << " to 0755: " | |
297 | << cpp_strerror(ret) << dendl; | |
298 | return ret; | |
299 | } | |
300 | ||
301 | stable_commits = true; | |
302 | return 0; | |
303 | } | |
304 | ||
305 | int BtrfsFileStoreBackend::list_checkpoints(list<string>& ls) | |
306 | { | |
307 | int ret, err = 0; | |
308 | ||
309 | struct stat basest; | |
310 | ret = ::fstat(get_basedir_fd(), &basest); | |
311 | if (ret < 0) { | |
312 | ret = -errno; | |
313 | dout(0) << "list_checkpoints: cannot fstat basedir " << cpp_strerror(ret) << dendl; | |
314 | return ret; | |
315 | } | |
316 | ||
317 | // get snap list | |
318 | DIR *dir = ::opendir(get_basedir_path().c_str()); | |
319 | if (!dir) { | |
320 | ret = -errno; | |
321 | dout(0) << "list_checkpoints: opendir '" << get_basedir_path() << "' failed: " | |
322 | << cpp_strerror(ret) << dendl; | |
323 | return ret; | |
324 | } | |
325 | ||
326 | list<string> snaps; | |
327 | char path[PATH_MAX]; | |
328 | struct dirent *de; | |
b3b6e05e TL |
329 | while (true) { |
330 | errno = 0; | |
331 | de = ::readdir(dir); | |
332 | if (de == nullptr) { | |
333 | if (errno != 0) { | |
334 | err = -errno; | |
335 | dout(0) << "list_checkpoints: readdir '" << get_basedir_path() << "' failed: " | |
336 | << cpp_strerror(err) << dendl; | |
337 | } | |
338 | break; | |
339 | } | |
7c673cae FG |
340 | snprintf(path, sizeof(path), "%s/%s", get_basedir_path().c_str(), de->d_name); |
341 | ||
342 | struct stat st; | |
343 | ret = ::stat(path, &st); | |
344 | if (ret < 0) { | |
345 | err = -errno; | |
346 | dout(0) << "list_checkpoints: stat '" << path << "' failed: " | |
347 | << cpp_strerror(err) << dendl; | |
348 | break; | |
349 | } | |
350 | ||
351 | if (!S_ISDIR(st.st_mode)) | |
352 | continue; | |
353 | ||
354 | struct statfs fs; | |
355 | ret = ::statfs(path, &fs); | |
356 | if (ret < 0) { | |
357 | err = -errno; | |
358 | dout(0) << "list_checkpoints: statfs '" << path << "' failed: " | |
359 | << cpp_strerror(err) << dendl; | |
360 | break; | |
361 | } | |
362 | ||
363 | if (fs.f_type == BTRFS_SUPER_MAGIC && basest.st_dev != st.st_dev) | |
364 | snaps.push_back(string(de->d_name)); | |
365 | } | |
366 | ||
367 | if (::closedir(dir) < 0) { | |
368 | ret = -errno; | |
369 | dout(0) << "list_checkpoints: closedir failed: " << cpp_strerror(ret) << dendl; | |
370 | if (!err) | |
371 | err = ret; | |
372 | } | |
373 | ||
374 | if (err) | |
375 | return err; | |
376 | ||
377 | ls.swap(snaps); | |
378 | return 0; | |
379 | } | |
380 | ||
381 | int BtrfsFileStoreBackend::create_checkpoint(const string& name, uint64_t *transid) | |
382 | { | |
383 | dout(10) << "create_checkpoint: '" << name << "'" << dendl; | |
384 | if (has_snap_create_v2 && transid) { | |
385 | struct btrfs_ioctl_vol_args_v2 async_args; | |
386 | memset(&async_args, 0, sizeof(async_args)); | |
387 | async_args.fd = get_current_fd(); | |
388 | async_args.flags = BTRFS_SUBVOL_CREATE_ASYNC; | |
389 | ||
390 | size_t name_size = sizeof(async_args.name); | |
391 | strncpy(async_args.name, name.c_str(), name_size); | |
392 | async_args.name[name_size-1] = '\0'; | |
393 | ||
394 | int r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_CREATE_V2, &async_args); | |
395 | if (r < 0) { | |
396 | r = -errno; | |
397 | dout(0) << "create_checkpoint: async snap create '" << name << "' got " << cpp_strerror(r) << dendl; | |
398 | return r; | |
399 | } | |
400 | dout(20) << "create_checkpoint: async snap create '" << name << "' transid " << async_args.transid << dendl; | |
401 | *transid = async_args.transid; | |
402 | } else { | |
403 | struct btrfs_ioctl_vol_args vol_args; | |
404 | memset(&vol_args, 0, sizeof(vol_args)); | |
405 | vol_args.fd = get_current_fd(); | |
406 | ||
407 | size_t name_size = sizeof(vol_args.name); | |
408 | strncpy(vol_args.name, name.c_str(), name_size); | |
409 | vol_args.name[name_size-1] = '\0'; | |
410 | ||
411 | int r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_CREATE, &vol_args); | |
412 | if (r < 0) { | |
413 | r = -errno; | |
414 | dout(0) << "create_checkpoint: snap create '" << name << "' got " << cpp_strerror(r) << dendl; | |
415 | return r; | |
416 | } | |
417 | if (transid) | |
418 | *transid = 0; | |
419 | } | |
420 | return 0; | |
421 | } | |
422 | ||
423 | int BtrfsFileStoreBackend::sync_checkpoint(uint64_t transid) | |
424 | { | |
425 | // wait for commit | |
426 | dout(10) << "sync_checkpoint: transid " << transid << " to complete" << dendl; | |
427 | int ret = ::ioctl(get_op_fd(), BTRFS_IOC_WAIT_SYNC, &transid); | |
428 | if (ret < 0) { | |
429 | ret = -errno; | |
430 | dout(0) << "sync_checkpoint: ioctl WAIT_SYNC got " << cpp_strerror(ret) << dendl; | |
431 | return -errno; | |
432 | } | |
433 | dout(20) << "sync_checkpoint: done waiting for transid " << transid << dendl; | |
434 | return 0; | |
435 | } | |
436 | ||
437 | int BtrfsFileStoreBackend::rollback_to(const string& name) | |
438 | { | |
439 | dout(10) << "rollback_to: to '" << name << "'" << dendl; | |
440 | char s[PATH_MAX]; | |
441 | btrfs_ioctl_vol_args vol_args; | |
442 | ||
443 | memset(&vol_args, 0, sizeof(vol_args)); | |
444 | vol_args.fd = 0; | |
445 | strcpy(vol_args.name, "current"); | |
446 | ||
447 | int ret = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_DESTROY, &vol_args); | |
448 | if (ret && errno != ENOENT) { | |
449 | dout(0) << "rollback_to: error removing old current subvol: " << cpp_strerror(ret) << dendl; | |
450 | snprintf(s, sizeof(s), "%s/current.remove.me.%d", get_basedir_path().c_str(), rand()); | |
451 | if (::rename(get_current_path().c_str(), s)) { | |
452 | ret = -errno; | |
453 | dout(0) << "rollback_to: error renaming old current subvol: " | |
454 | << cpp_strerror(ret) << dendl; | |
455 | return ret; | |
456 | } | |
457 | } | |
458 | ||
459 | snprintf(s, sizeof(s), "%s/%s", get_basedir_path().c_str(), name.c_str()); | |
460 | ||
461 | // roll back | |
91327a77 | 462 | vol_args.fd = ::open(s, O_RDONLY|O_CLOEXEC); |
7c673cae FG |
463 | if (vol_args.fd < 0) { |
464 | ret = -errno; | |
465 | dout(0) << "rollback_to: error opening '" << s << "': " << cpp_strerror(ret) << dendl; | |
466 | return ret; | |
467 | } | |
468 | ret = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_CREATE, &vol_args); | |
469 | if (ret < 0 ) { | |
470 | ret = -errno; | |
471 | dout(0) << "rollback_to: ioctl SNAP_CREATE got " << cpp_strerror(ret) << dendl; | |
472 | } | |
473 | TEMP_FAILURE_RETRY(::close(vol_args.fd)); | |
474 | return ret; | |
475 | } | |
476 | ||
477 | int BtrfsFileStoreBackend::destroy_checkpoint(const string& name) | |
478 | { | |
479 | dout(10) << "destroy_checkpoint: '" << name << "'" << dendl; | |
480 | btrfs_ioctl_vol_args vol_args; | |
481 | memset(&vol_args, 0, sizeof(vol_args)); | |
482 | vol_args.fd = 0; | |
9f95a23c TL |
483 | strncpy(vol_args.name, name.c_str(), sizeof(vol_args.name) - 1); |
484 | vol_args.name[sizeof(vol_args.name) - 1] = '\0'; | |
7c673cae FG |
485 | |
486 | int ret = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_DESTROY, &vol_args); | |
487 | if (ret) { | |
488 | ret = -errno; | |
489 | dout(0) << "destroy_checkpoint: ioctl SNAP_DESTROY got " << cpp_strerror(ret) << dendl; | |
490 | return ret; | |
491 | } | |
492 | return 0; | |
493 | } | |
494 | ||
495 | int BtrfsFileStoreBackend::syncfs() | |
496 | { | |
497 | dout(15) << "syncfs" << dendl; | |
498 | // do a full btrfs commit | |
499 | int ret = ::ioctl(get_op_fd(), BTRFS_IOC_SYNC); | |
500 | if (ret < 0) { | |
501 | ret = -errno; | |
502 | dout(0) << "syncfs: btrfs IOC_SYNC got " << cpp_strerror(ret) << dendl; | |
503 | } | |
504 | return ret; | |
505 | } | |
506 | ||
507 | int BtrfsFileStoreBackend::clone_range(int from, int to, uint64_t srcoff, uint64_t len, uint64_t dstoff) | |
508 | { | |
509 | dout(20) << "clone_range: " << srcoff << "~" << len << " to " << dstoff << dendl; | |
510 | size_t blk_size = get_blksize(); | |
511 | if (!has_clone_range || | |
512 | srcoff % blk_size != dstoff % blk_size) { | |
513 | dout(20) << "clone_range: using copy" << dendl; | |
514 | return _copy_range(from, to, srcoff, len, dstoff); | |
515 | } | |
516 | ||
517 | int err = 0; | |
518 | int r = 0; | |
519 | ||
520 | uint64_t srcoffclone = ALIGN_UP(srcoff, blk_size); | |
521 | uint64_t dstoffclone = ALIGN_UP(dstoff, blk_size); | |
522 | if (srcoffclone >= srcoff + len) { | |
523 | dout(20) << "clone_range: using copy, extent too short to align srcoff" << dendl; | |
524 | return _copy_range(from, to, srcoff, len, dstoff); | |
525 | } | |
526 | ||
527 | uint64_t lenclone = len - (srcoffclone - srcoff); | |
528 | if (!ALIGNED(lenclone, blk_size)) { | |
529 | struct stat from_stat, to_stat; | |
530 | err = ::fstat(from, &from_stat); | |
531 | if (err) return -errno; | |
532 | err = ::fstat(to , &to_stat); | |
533 | if (err) return -errno; | |
534 | ||
535 | if (srcoff + len != (uint64_t)from_stat.st_size || | |
536 | dstoff + len < (uint64_t)to_stat.st_size) { | |
537 | // Not to the end of the file, need to align length as well | |
538 | lenclone = ALIGN_DOWN(lenclone, blk_size); | |
539 | } | |
540 | } | |
541 | if (lenclone == 0) { | |
542 | // too short | |
543 | return _copy_range(from, to, srcoff, len, dstoff); | |
544 | } | |
545 | ||
546 | dout(20) << "clone_range: cloning " << srcoffclone << "~" << lenclone | |
547 | << " to " << dstoffclone << " = " << r << dendl; | |
548 | btrfs_ioctl_clone_range_args a; | |
549 | a.src_fd = from; | |
550 | a.src_offset = srcoffclone; | |
551 | a.src_length = lenclone; | |
552 | a.dest_offset = dstoffclone; | |
553 | err = ::ioctl(to, BTRFS_IOC_CLONE_RANGE, &a); | |
554 | if (err >= 0) { | |
555 | r += err; | |
556 | } else if (errno == EINVAL) { | |
557 | // Still failed, might be compressed | |
558 | dout(20) << "clone_range: failed CLONE_RANGE call with -EINVAL, using copy" << dendl; | |
559 | return _copy_range(from, to, srcoff, len, dstoff); | |
560 | } else { | |
561 | return -errno; | |
562 | } | |
563 | ||
564 | // Take care any trimmed from front | |
565 | if (srcoffclone != srcoff) { | |
566 | err = _copy_range(from, to, srcoff, srcoffclone - srcoff, dstoff); | |
567 | if (err >= 0) { | |
568 | r += err; | |
569 | } else { | |
570 | return err; | |
571 | } | |
572 | } | |
573 | ||
574 | // Copy end | |
575 | if (srcoffclone + lenclone != srcoff + len) { | |
576 | err = _copy_range(from, to, | |
577 | srcoffclone + lenclone, | |
578 | (srcoff + len) - (srcoffclone + lenclone), | |
579 | dstoffclone + lenclone); | |
580 | if (err >= 0) { | |
581 | r += err; | |
582 | } else { | |
583 | return err; | |
584 | } | |
585 | } | |
586 | dout(20) << "clone_range: finished " << srcoff << "~" << len | |
587 | << " to " << dstoff << " = " << r << dendl; | |
588 | return r; | |
589 | } | |
590 | #endif |