]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include "include/int_types.h" | |
16 | #include "include/types.h" | |
17 | ||
18 | #include <unistd.h> | |
19 | #include <fcntl.h> | |
20 | #include <errno.h> | |
21 | #include <stdlib.h> | |
22 | #include <sys/types.h> | |
23 | #include <sys/stat.h> | |
24 | #include <sys/ioctl.h> | |
25 | #include "include/compat.h" | |
26 | #include "include/linux_fiemap.h" | |
27 | #include "include/color.h" | |
28 | #include "include/buffer.h" | |
11fdf7f2 | 29 | #include "include/ceph_assert.h" |
7c673cae FG |
30 | |
31 | #ifndef __CYGWIN__ | |
32 | #include "os/fs/btrfs_ioctl.h" | |
33 | #endif | |
34 | ||
35 | #include <iostream> | |
36 | #include <fstream> | |
37 | #include <sstream> | |
38 | ||
39 | #include "BtrfsFileStoreBackend.h" | |
40 | ||
41 | #include "common/errno.h" | |
42 | #include "common/config.h" | |
43 | ||
44 | #if defined(__linux__) | |
45 | ||
46 | #define dout_context cct() | |
47 | #define dout_subsys ceph_subsys_filestore | |
48 | #undef dout_prefix | |
49 | #define dout_prefix *_dout << "btrfsfilestorebackend(" << get_basedir_path() << ") " | |
50 | ||
f67539c2 TL |
51 | using std::cerr; |
52 | using std::list; | |
53 | using std::string; | |
54 | ||
7c673cae FG |
55 | #define ALIGN_DOWN(x, by) ((x) - ((x) % (by))) |
56 | #define ALIGNED(x, by) (!((x) % (by))) | |
57 | #define ALIGN_UP(x, by) (ALIGNED((x), (by)) ? (x) : (ALIGN_DOWN((x), (by)) + (by))) | |
58 | ||
59 | BtrfsFileStoreBackend::BtrfsFileStoreBackend(FileStore *fs): | |
60 | GenericFileStoreBackend(fs), has_clone_range(false), | |
61 | has_snap_create(false), has_snap_destroy(false), | |
62 | has_snap_create_v2(false), has_wait_sync(false), stable_commits(false), | |
63 | m_filestore_btrfs_clone_range(cct()->_conf->filestore_btrfs_clone_range), | |
64 | m_filestore_btrfs_snap (cct()->_conf->filestore_btrfs_snap) { } | |
65 | ||
66 | int BtrfsFileStoreBackend::detect_features() | |
67 | { | |
68 | int r; | |
69 | ||
70 | r = GenericFileStoreBackend::detect_features(); | |
71 | if (r < 0) | |
72 | return r; | |
73 | ||
74 | // clone_range? | |
75 | if (m_filestore_btrfs_clone_range) { | |
91327a77 | 76 | int fd = ::openat(get_basedir_fd(), "clone_range_test", O_CREAT|O_WRONLY|O_CLOEXEC, 0600); |
7c673cae FG |
77 | if (fd >= 0) { |
78 | if (::unlinkat(get_basedir_fd(), "clone_range_test", 0) < 0) { | |
79 | r = -errno; | |
80 | dout(0) << "detect_feature: failed to unlink test file for CLONE_RANGE ioctl: " | |
81 | << cpp_strerror(r) << dendl; | |
82 | } | |
83 | btrfs_ioctl_clone_range_args clone_args; | |
84 | memset(&clone_args, 0, sizeof(clone_args)); | |
85 | clone_args.src_fd = -1; | |
86 | r = ::ioctl(fd, BTRFS_IOC_CLONE_RANGE, &clone_args); | |
87 | if (r < 0 && errno == EBADF) { | |
88 | dout(0) << "detect_feature: CLONE_RANGE ioctl is supported" << dendl; | |
89 | has_clone_range = true; | |
90 | } else { | |
91 | r = -errno; | |
92 | dout(0) << "detect_feature: CLONE_RANGE ioctl is NOT supported: " << cpp_strerror(r) << dendl; | |
93 | } | |
94 | TEMP_FAILURE_RETRY(::close(fd)); | |
95 | } else { | |
96 | r = -errno; | |
97 | dout(0) << "detect_feature: failed to create test file for CLONE_RANGE ioctl: " | |
98 | << cpp_strerror(r) << dendl; | |
99 | } | |
100 | } else { | |
101 | dout(0) << "detect_feature: CLONE_RANGE ioctl is DISABLED via 'filestore btrfs clone range' option" << dendl; | |
102 | } | |
103 | ||
104 | struct btrfs_ioctl_vol_args vol_args; | |
105 | memset(&vol_args, 0, sizeof(vol_args)); | |
106 | ||
107 | // create test source volume | |
108 | vol_args.fd = 0; | |
109 | strcpy(vol_args.name, "test_subvol"); | |
110 | r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SUBVOL_CREATE, &vol_args); | |
111 | if (r != 0) { | |
112 | r = -errno; | |
113 | dout(0) << "detect_feature: failed to create simple subvolume " << vol_args.name << ": " << cpp_strerror(r) << dendl; | |
114 | } | |
91327a77 | 115 | int srcfd = ::openat(get_basedir_fd(), vol_args.name, O_RDONLY|O_CLOEXEC); |
7c673cae FG |
116 | if (srcfd < 0) { |
117 | r = -errno; | |
118 | dout(0) << "detect_feature: failed to open " << vol_args.name << ": " << cpp_strerror(r) << dendl; | |
119 | } | |
120 | ||
121 | // snap_create and snap_destroy? | |
122 | vol_args.fd = srcfd; | |
123 | strcpy(vol_args.name, "sync_snap_test"); | |
124 | r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_CREATE, &vol_args); | |
125 | int err = errno; | |
126 | if (r == 0 || errno == EEXIST) { | |
127 | dout(0) << "detect_feature: SNAP_CREATE is supported" << dendl; | |
128 | has_snap_create = true; | |
129 | ||
130 | r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_DESTROY, &vol_args); | |
131 | if (r == 0) { | |
132 | dout(0) << "detect_feature: SNAP_DESTROY is supported" << dendl; | |
133 | has_snap_destroy = true; | |
134 | } else { | |
135 | err = -errno; | |
136 | dout(0) << "detect_feature: SNAP_DESTROY failed: " << cpp_strerror(err) << dendl; | |
137 | ||
138 | if (err == -EPERM && getuid() != 0) { | |
139 | dout(0) << "detect_feature: failed with EPERM as non-root; remount with -o user_subvol_rm_allowed" << dendl; | |
140 | cerr << TEXT_YELLOW | |
141 | << "btrfs SNAP_DESTROY failed as non-root; remount with -o user_subvol_rm_allowed" | |
142 | << TEXT_NORMAL << std::endl; | |
143 | } else if (err == -EOPNOTSUPP) { | |
144 | derr << "btrfs SNAP_DESTROY ioctl not supported; you need a kernel newer than 2.6.32" << dendl; | |
145 | } | |
146 | } | |
147 | } else { | |
148 | dout(0) << "detect_feature: SNAP_CREATE failed: " << cpp_strerror(err) << dendl; | |
149 | } | |
150 | ||
151 | if (m_filestore_btrfs_snap) { | |
152 | if (has_snap_destroy) | |
153 | stable_commits = true; | |
154 | else | |
155 | dout(0) << "detect_feature: snaps enabled, but no SNAP_DESTROY ioctl; DISABLING" << dendl; | |
156 | } | |
157 | ||
158 | // start_sync? | |
159 | __u64 transid = 0; | |
160 | r = ::ioctl(get_basedir_fd(), BTRFS_IOC_START_SYNC, &transid); | |
161 | if (r < 0) { | |
162 | int err = errno; | |
163 | dout(0) << "detect_feature: START_SYNC got " << cpp_strerror(err) << dendl; | |
164 | } | |
165 | if (r == 0 && transid > 0) { | |
166 | dout(0) << "detect_feature: START_SYNC is supported (transid " << transid << ")" << dendl; | |
167 | ||
168 | // do we have wait_sync too? | |
169 | r = ::ioctl(get_basedir_fd(), BTRFS_IOC_WAIT_SYNC, &transid); | |
170 | if (r == 0 || errno == ERANGE) { | |
171 | dout(0) << "detect_feature: WAIT_SYNC is supported" << dendl; | |
172 | has_wait_sync = true; | |
173 | } else { | |
174 | int err = errno; | |
175 | dout(0) << "detect_feature: WAIT_SYNC is NOT supported: " << cpp_strerror(err) << dendl; | |
176 | } | |
177 | } else { | |
178 | int err = errno; | |
179 | dout(0) << "detect_feature: START_SYNC is NOT supported: " << cpp_strerror(err) << dendl; | |
180 | } | |
181 | ||
182 | if (has_wait_sync) { | |
183 | // async snap creation? | |
184 | struct btrfs_ioctl_vol_args_v2 async_args; | |
185 | memset(&async_args, 0, sizeof(async_args)); | |
186 | async_args.fd = srcfd; | |
187 | async_args.flags = BTRFS_SUBVOL_CREATE_ASYNC; | |
188 | strcpy(async_args.name, "async_snap_test"); | |
189 | ||
190 | // remove old one, first | |
191 | struct stat st; | |
192 | strcpy(vol_args.name, async_args.name); | |
193 | if (::fstatat(get_basedir_fd(), vol_args.name, &st, 0) == 0) { | |
194 | dout(0) << "detect_feature: removing old async_snap_test" << dendl; | |
195 | r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_DESTROY, &vol_args); | |
196 | if (r != 0) { | |
197 | int err = errno; | |
198 | dout(0) << "detect_feature: failed to remove old async_snap_test: " << cpp_strerror(err) << dendl; | |
199 | } | |
200 | } | |
201 | ||
202 | r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_CREATE_V2, &async_args); | |
203 | if (r == 0 || errno == EEXIST) { | |
204 | dout(0) << "detect_feature: SNAP_CREATE_V2 is supported" << dendl; | |
205 | has_snap_create_v2 = true; | |
206 | ||
207 | // clean up | |
208 | strcpy(vol_args.name, "async_snap_test"); | |
209 | r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_DESTROY, &vol_args); | |
210 | if (r != 0) { | |
211 | int err = errno; | |
212 | dout(0) << "detect_feature: SNAP_DESTROY failed: " << cpp_strerror(err) << dendl; | |
213 | } | |
214 | } else { | |
215 | int err = errno; | |
216 | dout(0) << "detect_feature: SNAP_CREATE_V2 is NOT supported: " << cpp_strerror(err) << dendl; | |
217 | } | |
218 | } | |
219 | ||
220 | // clean up test subvol | |
221 | if (srcfd >= 0) | |
222 | TEMP_FAILURE_RETRY(::close(srcfd)); | |
223 | ||
224 | strcpy(vol_args.name, "test_subvol"); | |
225 | r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_DESTROY, &vol_args); | |
226 | if (r < 0) { | |
227 | r = -errno; | |
228 | dout(0) << "detect_feature: failed to remove " << vol_args.name << ": " << cpp_strerror(r) << dendl; | |
229 | } | |
230 | ||
231 | if (m_filestore_btrfs_snap && !has_snap_create_v2) { | |
232 | dout(0) << "mount WARNING: btrfs snaps enabled, but no SNAP_CREATE_V2 ioctl (from kernel 2.6.37+)" << dendl; | |
233 | cerr << TEXT_YELLOW | |
234 | << " ** WARNING: 'filestore btrfs snap' is enabled (for safe transactions,\n" | |
235 | << " rollback), but btrfs does not support the SNAP_CREATE_V2 ioctl\n" | |
236 | << " (added in Linux 2.6.37). Expect slow btrfs sync/commit\n" | |
237 | << " performance.\n" | |
238 | << TEXT_NORMAL; | |
239 | } | |
240 | ||
241 | return 0; | |
242 | } | |
243 | ||
244 | bool BtrfsFileStoreBackend::can_checkpoint() | |
245 | { | |
246 | return stable_commits; | |
247 | } | |
248 | ||
249 | int BtrfsFileStoreBackend::create_current() | |
250 | { | |
251 | struct stat st; | |
252 | int ret = ::stat(get_current_path().c_str(), &st); | |
253 | if (ret == 0) { | |
254 | // current/ exists | |
255 | if (!S_ISDIR(st.st_mode)) { | |
256 | dout(0) << "create_current: current/ exists but is not a directory" << dendl; | |
257 | return -EINVAL; | |
258 | } | |
259 | ||
260 | struct stat basest; | |
261 | struct statfs currentfs; | |
262 | ret = ::fstat(get_basedir_fd(), &basest); | |
263 | if (ret < 0) { | |
264 | ret = -errno; | |
265 | dout(0) << "create_current: cannot fstat basedir " << cpp_strerror(ret) << dendl; | |
266 | return ret; | |
267 | } | |
268 | ret = ::statfs(get_current_path().c_str(), ¤tfs); | |
269 | if (ret < 0) { | |
270 | ret = -errno; | |
271 | dout(0) << "create_current: cannot statsf basedir " << cpp_strerror(ret) << dendl; | |
272 | return ret; | |
273 | } | |
274 | if (currentfs.f_type == BTRFS_SUPER_MAGIC && basest.st_dev != st.st_dev) { | |
275 | dout(2) << "create_current: current appears to be a btrfs subvolume" << dendl; | |
276 | stable_commits = true; | |
277 | } | |
278 | return 0; | |
279 | } | |
280 | ||
281 | struct btrfs_ioctl_vol_args volargs; | |
282 | memset(&volargs, 0, sizeof(volargs)); | |
283 | ||
284 | volargs.fd = 0; | |
285 | strcpy(volargs.name, "current"); | |
286 | if (::ioctl(get_basedir_fd(), BTRFS_IOC_SUBVOL_CREATE, (unsigned long int)&volargs) < 0) { | |
287 | ret = -errno; | |
288 | dout(0) << "create_current: BTRFS_IOC_SUBVOL_CREATE failed with error " | |
289 | << cpp_strerror(ret) << dendl; | |
290 | return ret; | |
291 | } | |
292 | ||
293 | dout(2) << "create_current: created btrfs subvol " << get_current_path() << dendl; | |
294 | if (::chmod(get_current_path().c_str(), 0755) < 0) { | |
295 | ret = -errno; | |
296 | dout(0) << "create_current: failed to chmod " << get_current_path() << " to 0755: " | |
297 | << cpp_strerror(ret) << dendl; | |
298 | return ret; | |
299 | } | |
300 | ||
301 | stable_commits = true; | |
302 | return 0; | |
303 | } | |
304 | ||
305 | int BtrfsFileStoreBackend::list_checkpoints(list<string>& ls) | |
306 | { | |
307 | int ret, err = 0; | |
308 | ||
309 | struct stat basest; | |
310 | ret = ::fstat(get_basedir_fd(), &basest); | |
311 | if (ret < 0) { | |
312 | ret = -errno; | |
313 | dout(0) << "list_checkpoints: cannot fstat basedir " << cpp_strerror(ret) << dendl; | |
314 | return ret; | |
315 | } | |
316 | ||
317 | // get snap list | |
318 | DIR *dir = ::opendir(get_basedir_path().c_str()); | |
319 | if (!dir) { | |
320 | ret = -errno; | |
321 | dout(0) << "list_checkpoints: opendir '" << get_basedir_path() << "' failed: " | |
322 | << cpp_strerror(ret) << dendl; | |
323 | return ret; | |
324 | } | |
325 | ||
326 | list<string> snaps; | |
327 | char path[PATH_MAX]; | |
328 | struct dirent *de; | |
329 | while ((de = ::readdir(dir))) { | |
330 | snprintf(path, sizeof(path), "%s/%s", get_basedir_path().c_str(), de->d_name); | |
331 | ||
332 | struct stat st; | |
333 | ret = ::stat(path, &st); | |
334 | if (ret < 0) { | |
335 | err = -errno; | |
336 | dout(0) << "list_checkpoints: stat '" << path << "' failed: " | |
337 | << cpp_strerror(err) << dendl; | |
338 | break; | |
339 | } | |
340 | ||
341 | if (!S_ISDIR(st.st_mode)) | |
342 | continue; | |
343 | ||
344 | struct statfs fs; | |
345 | ret = ::statfs(path, &fs); | |
346 | if (ret < 0) { | |
347 | err = -errno; | |
348 | dout(0) << "list_checkpoints: statfs '" << path << "' failed: " | |
349 | << cpp_strerror(err) << dendl; | |
350 | break; | |
351 | } | |
352 | ||
353 | if (fs.f_type == BTRFS_SUPER_MAGIC && basest.st_dev != st.st_dev) | |
354 | snaps.push_back(string(de->d_name)); | |
355 | } | |
356 | ||
357 | if (::closedir(dir) < 0) { | |
358 | ret = -errno; | |
359 | dout(0) << "list_checkpoints: closedir failed: " << cpp_strerror(ret) << dendl; | |
360 | if (!err) | |
361 | err = ret; | |
362 | } | |
363 | ||
364 | if (err) | |
365 | return err; | |
366 | ||
367 | ls.swap(snaps); | |
368 | return 0; | |
369 | } | |
370 | ||
371 | int BtrfsFileStoreBackend::create_checkpoint(const string& name, uint64_t *transid) | |
372 | { | |
373 | dout(10) << "create_checkpoint: '" << name << "'" << dendl; | |
374 | if (has_snap_create_v2 && transid) { | |
375 | struct btrfs_ioctl_vol_args_v2 async_args; | |
376 | memset(&async_args, 0, sizeof(async_args)); | |
377 | async_args.fd = get_current_fd(); | |
378 | async_args.flags = BTRFS_SUBVOL_CREATE_ASYNC; | |
379 | ||
380 | size_t name_size = sizeof(async_args.name); | |
381 | strncpy(async_args.name, name.c_str(), name_size); | |
382 | async_args.name[name_size-1] = '\0'; | |
383 | ||
384 | int r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_CREATE_V2, &async_args); | |
385 | if (r < 0) { | |
386 | r = -errno; | |
387 | dout(0) << "create_checkpoint: async snap create '" << name << "' got " << cpp_strerror(r) << dendl; | |
388 | return r; | |
389 | } | |
390 | dout(20) << "create_checkpoint: async snap create '" << name << "' transid " << async_args.transid << dendl; | |
391 | *transid = async_args.transid; | |
392 | } else { | |
393 | struct btrfs_ioctl_vol_args vol_args; | |
394 | memset(&vol_args, 0, sizeof(vol_args)); | |
395 | vol_args.fd = get_current_fd(); | |
396 | ||
397 | size_t name_size = sizeof(vol_args.name); | |
398 | strncpy(vol_args.name, name.c_str(), name_size); | |
399 | vol_args.name[name_size-1] = '\0'; | |
400 | ||
401 | int r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_CREATE, &vol_args); | |
402 | if (r < 0) { | |
403 | r = -errno; | |
404 | dout(0) << "create_checkpoint: snap create '" << name << "' got " << cpp_strerror(r) << dendl; | |
405 | return r; | |
406 | } | |
407 | if (transid) | |
408 | *transid = 0; | |
409 | } | |
410 | return 0; | |
411 | } | |
412 | ||
413 | int BtrfsFileStoreBackend::sync_checkpoint(uint64_t transid) | |
414 | { | |
415 | // wait for commit | |
416 | dout(10) << "sync_checkpoint: transid " << transid << " to complete" << dendl; | |
417 | int ret = ::ioctl(get_op_fd(), BTRFS_IOC_WAIT_SYNC, &transid); | |
418 | if (ret < 0) { | |
419 | ret = -errno; | |
420 | dout(0) << "sync_checkpoint: ioctl WAIT_SYNC got " << cpp_strerror(ret) << dendl; | |
421 | return -errno; | |
422 | } | |
423 | dout(20) << "sync_checkpoint: done waiting for transid " << transid << dendl; | |
424 | return 0; | |
425 | } | |
426 | ||
427 | int BtrfsFileStoreBackend::rollback_to(const string& name) | |
428 | { | |
429 | dout(10) << "rollback_to: to '" << name << "'" << dendl; | |
430 | char s[PATH_MAX]; | |
431 | btrfs_ioctl_vol_args vol_args; | |
432 | ||
433 | memset(&vol_args, 0, sizeof(vol_args)); | |
434 | vol_args.fd = 0; | |
435 | strcpy(vol_args.name, "current"); | |
436 | ||
437 | int ret = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_DESTROY, &vol_args); | |
438 | if (ret && errno != ENOENT) { | |
439 | dout(0) << "rollback_to: error removing old current subvol: " << cpp_strerror(ret) << dendl; | |
440 | snprintf(s, sizeof(s), "%s/current.remove.me.%d", get_basedir_path().c_str(), rand()); | |
441 | if (::rename(get_current_path().c_str(), s)) { | |
442 | ret = -errno; | |
443 | dout(0) << "rollback_to: error renaming old current subvol: " | |
444 | << cpp_strerror(ret) << dendl; | |
445 | return ret; | |
446 | } | |
447 | } | |
448 | ||
449 | snprintf(s, sizeof(s), "%s/%s", get_basedir_path().c_str(), name.c_str()); | |
450 | ||
451 | // roll back | |
91327a77 | 452 | vol_args.fd = ::open(s, O_RDONLY|O_CLOEXEC); |
7c673cae FG |
453 | if (vol_args.fd < 0) { |
454 | ret = -errno; | |
455 | dout(0) << "rollback_to: error opening '" << s << "': " << cpp_strerror(ret) << dendl; | |
456 | return ret; | |
457 | } | |
458 | ret = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_CREATE, &vol_args); | |
459 | if (ret < 0 ) { | |
460 | ret = -errno; | |
461 | dout(0) << "rollback_to: ioctl SNAP_CREATE got " << cpp_strerror(ret) << dendl; | |
462 | } | |
463 | TEMP_FAILURE_RETRY(::close(vol_args.fd)); | |
464 | return ret; | |
465 | } | |
466 | ||
467 | int BtrfsFileStoreBackend::destroy_checkpoint(const string& name) | |
468 | { | |
469 | dout(10) << "destroy_checkpoint: '" << name << "'" << dendl; | |
470 | btrfs_ioctl_vol_args vol_args; | |
471 | memset(&vol_args, 0, sizeof(vol_args)); | |
472 | vol_args.fd = 0; | |
9f95a23c TL |
473 | strncpy(vol_args.name, name.c_str(), sizeof(vol_args.name) - 1); |
474 | vol_args.name[sizeof(vol_args.name) - 1] = '\0'; | |
7c673cae FG |
475 | |
476 | int ret = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_DESTROY, &vol_args); | |
477 | if (ret) { | |
478 | ret = -errno; | |
479 | dout(0) << "destroy_checkpoint: ioctl SNAP_DESTROY got " << cpp_strerror(ret) << dendl; | |
480 | return ret; | |
481 | } | |
482 | return 0; | |
483 | } | |
484 | ||
485 | int BtrfsFileStoreBackend::syncfs() | |
486 | { | |
487 | dout(15) << "syncfs" << dendl; | |
488 | // do a full btrfs commit | |
489 | int ret = ::ioctl(get_op_fd(), BTRFS_IOC_SYNC); | |
490 | if (ret < 0) { | |
491 | ret = -errno; | |
492 | dout(0) << "syncfs: btrfs IOC_SYNC got " << cpp_strerror(ret) << dendl; | |
493 | } | |
494 | return ret; | |
495 | } | |
496 | ||
497 | int BtrfsFileStoreBackend::clone_range(int from, int to, uint64_t srcoff, uint64_t len, uint64_t dstoff) | |
498 | { | |
499 | dout(20) << "clone_range: " << srcoff << "~" << len << " to " << dstoff << dendl; | |
500 | size_t blk_size = get_blksize(); | |
501 | if (!has_clone_range || | |
502 | srcoff % blk_size != dstoff % blk_size) { | |
503 | dout(20) << "clone_range: using copy" << dendl; | |
504 | return _copy_range(from, to, srcoff, len, dstoff); | |
505 | } | |
506 | ||
507 | int err = 0; | |
508 | int r = 0; | |
509 | ||
510 | uint64_t srcoffclone = ALIGN_UP(srcoff, blk_size); | |
511 | uint64_t dstoffclone = ALIGN_UP(dstoff, blk_size); | |
512 | if (srcoffclone >= srcoff + len) { | |
513 | dout(20) << "clone_range: using copy, extent too short to align srcoff" << dendl; | |
514 | return _copy_range(from, to, srcoff, len, dstoff); | |
515 | } | |
516 | ||
517 | uint64_t lenclone = len - (srcoffclone - srcoff); | |
518 | if (!ALIGNED(lenclone, blk_size)) { | |
519 | struct stat from_stat, to_stat; | |
520 | err = ::fstat(from, &from_stat); | |
521 | if (err) return -errno; | |
522 | err = ::fstat(to , &to_stat); | |
523 | if (err) return -errno; | |
524 | ||
525 | if (srcoff + len != (uint64_t)from_stat.st_size || | |
526 | dstoff + len < (uint64_t)to_stat.st_size) { | |
527 | // Not to the end of the file, need to align length as well | |
528 | lenclone = ALIGN_DOWN(lenclone, blk_size); | |
529 | } | |
530 | } | |
531 | if (lenclone == 0) { | |
532 | // too short | |
533 | return _copy_range(from, to, srcoff, len, dstoff); | |
534 | } | |
535 | ||
536 | dout(20) << "clone_range: cloning " << srcoffclone << "~" << lenclone | |
537 | << " to " << dstoffclone << " = " << r << dendl; | |
538 | btrfs_ioctl_clone_range_args a; | |
539 | a.src_fd = from; | |
540 | a.src_offset = srcoffclone; | |
541 | a.src_length = lenclone; | |
542 | a.dest_offset = dstoffclone; | |
543 | err = ::ioctl(to, BTRFS_IOC_CLONE_RANGE, &a); | |
544 | if (err >= 0) { | |
545 | r += err; | |
546 | } else if (errno == EINVAL) { | |
547 | // Still failed, might be compressed | |
548 | dout(20) << "clone_range: failed CLONE_RANGE call with -EINVAL, using copy" << dendl; | |
549 | return _copy_range(from, to, srcoff, len, dstoff); | |
550 | } else { | |
551 | return -errno; | |
552 | } | |
553 | ||
554 | // Take care any trimmed from front | |
555 | if (srcoffclone != srcoff) { | |
556 | err = _copy_range(from, to, srcoff, srcoffclone - srcoff, dstoff); | |
557 | if (err >= 0) { | |
558 | r += err; | |
559 | } else { | |
560 | return err; | |
561 | } | |
562 | } | |
563 | ||
564 | // Copy end | |
565 | if (srcoffclone + lenclone != srcoff + len) { | |
566 | err = _copy_range(from, to, | |
567 | srcoffclone + lenclone, | |
568 | (srcoff + len) - (srcoffclone + lenclone), | |
569 | dstoffclone + lenclone); | |
570 | if (err >= 0) { | |
571 | r += err; | |
572 | } else { | |
573 | return err; | |
574 | } | |
575 | } | |
576 | dout(20) << "clone_range: finished " << srcoff << "~" << len | |
577 | << " to " << dstoff << " = " << r << dendl; | |
578 | return r; | |
579 | } | |
580 | #endif |