]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include "include/int_types.h" | |
16 | #include "include/types.h" | |
17 | ||
18 | #include <unistd.h> | |
19 | #include <fcntl.h> | |
20 | #include <errno.h> | |
21 | #include <stdlib.h> | |
22 | #include <sys/types.h> | |
23 | #include <sys/stat.h> | |
24 | #include <sys/ioctl.h> | |
25 | #include "include/compat.h" | |
26 | #include "include/linux_fiemap.h" | |
27 | #include "include/color.h" | |
28 | #include "include/buffer.h" | |
11fdf7f2 | 29 | #include "include/ceph_assert.h" |
7c673cae FG |
30 | |
31 | #ifndef __CYGWIN__ | |
32 | #include "os/fs/btrfs_ioctl.h" | |
33 | #endif | |
34 | ||
35 | #include <iostream> | |
36 | #include <fstream> | |
37 | #include <sstream> | |
38 | ||
39 | #include "BtrfsFileStoreBackend.h" | |
40 | ||
41 | #include "common/errno.h" | |
42 | #include "common/config.h" | |
43 | ||
44 | #if defined(__linux__) | |
45 | ||
46 | #define dout_context cct() | |
47 | #define dout_subsys ceph_subsys_filestore | |
48 | #undef dout_prefix | |
49 | #define dout_prefix *_dout << "btrfsfilestorebackend(" << get_basedir_path() << ") " | |
50 | ||
51 | #define ALIGN_DOWN(x, by) ((x) - ((x) % (by))) | |
52 | #define ALIGNED(x, by) (!((x) % (by))) | |
53 | #define ALIGN_UP(x, by) (ALIGNED((x), (by)) ? (x) : (ALIGN_DOWN((x), (by)) + (by))) | |
54 | ||
55 | BtrfsFileStoreBackend::BtrfsFileStoreBackend(FileStore *fs): | |
56 | GenericFileStoreBackend(fs), has_clone_range(false), | |
57 | has_snap_create(false), has_snap_destroy(false), | |
58 | has_snap_create_v2(false), has_wait_sync(false), stable_commits(false), | |
59 | m_filestore_btrfs_clone_range(cct()->_conf->filestore_btrfs_clone_range), | |
60 | m_filestore_btrfs_snap (cct()->_conf->filestore_btrfs_snap) { } | |
61 | ||
62 | int BtrfsFileStoreBackend::detect_features() | |
63 | { | |
64 | int r; | |
65 | ||
66 | r = GenericFileStoreBackend::detect_features(); | |
67 | if (r < 0) | |
68 | return r; | |
69 | ||
70 | // clone_range? | |
71 | if (m_filestore_btrfs_clone_range) { | |
91327a77 | 72 | int fd = ::openat(get_basedir_fd(), "clone_range_test", O_CREAT|O_WRONLY|O_CLOEXEC, 0600); |
7c673cae FG |
73 | if (fd >= 0) { |
74 | if (::unlinkat(get_basedir_fd(), "clone_range_test", 0) < 0) { | |
75 | r = -errno; | |
76 | dout(0) << "detect_feature: failed to unlink test file for CLONE_RANGE ioctl: " | |
77 | << cpp_strerror(r) << dendl; | |
78 | } | |
79 | btrfs_ioctl_clone_range_args clone_args; | |
80 | memset(&clone_args, 0, sizeof(clone_args)); | |
81 | clone_args.src_fd = -1; | |
82 | r = ::ioctl(fd, BTRFS_IOC_CLONE_RANGE, &clone_args); | |
83 | if (r < 0 && errno == EBADF) { | |
84 | dout(0) << "detect_feature: CLONE_RANGE ioctl is supported" << dendl; | |
85 | has_clone_range = true; | |
86 | } else { | |
87 | r = -errno; | |
88 | dout(0) << "detect_feature: CLONE_RANGE ioctl is NOT supported: " << cpp_strerror(r) << dendl; | |
89 | } | |
90 | TEMP_FAILURE_RETRY(::close(fd)); | |
91 | } else { | |
92 | r = -errno; | |
93 | dout(0) << "detect_feature: failed to create test file for CLONE_RANGE ioctl: " | |
94 | << cpp_strerror(r) << dendl; | |
95 | } | |
96 | } else { | |
97 | dout(0) << "detect_feature: CLONE_RANGE ioctl is DISABLED via 'filestore btrfs clone range' option" << dendl; | |
98 | } | |
99 | ||
100 | struct btrfs_ioctl_vol_args vol_args; | |
101 | memset(&vol_args, 0, sizeof(vol_args)); | |
102 | ||
103 | // create test source volume | |
104 | vol_args.fd = 0; | |
105 | strcpy(vol_args.name, "test_subvol"); | |
106 | r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SUBVOL_CREATE, &vol_args); | |
107 | if (r != 0) { | |
108 | r = -errno; | |
109 | dout(0) << "detect_feature: failed to create simple subvolume " << vol_args.name << ": " << cpp_strerror(r) << dendl; | |
110 | } | |
91327a77 | 111 | int srcfd = ::openat(get_basedir_fd(), vol_args.name, O_RDONLY|O_CLOEXEC); |
7c673cae FG |
112 | if (srcfd < 0) { |
113 | r = -errno; | |
114 | dout(0) << "detect_feature: failed to open " << vol_args.name << ": " << cpp_strerror(r) << dendl; | |
115 | } | |
116 | ||
117 | // snap_create and snap_destroy? | |
118 | vol_args.fd = srcfd; | |
119 | strcpy(vol_args.name, "sync_snap_test"); | |
120 | r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_CREATE, &vol_args); | |
121 | int err = errno; | |
122 | if (r == 0 || errno == EEXIST) { | |
123 | dout(0) << "detect_feature: SNAP_CREATE is supported" << dendl; | |
124 | has_snap_create = true; | |
125 | ||
126 | r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_DESTROY, &vol_args); | |
127 | if (r == 0) { | |
128 | dout(0) << "detect_feature: SNAP_DESTROY is supported" << dendl; | |
129 | has_snap_destroy = true; | |
130 | } else { | |
131 | err = -errno; | |
132 | dout(0) << "detect_feature: SNAP_DESTROY failed: " << cpp_strerror(err) << dendl; | |
133 | ||
134 | if (err == -EPERM && getuid() != 0) { | |
135 | dout(0) << "detect_feature: failed with EPERM as non-root; remount with -o user_subvol_rm_allowed" << dendl; | |
136 | cerr << TEXT_YELLOW | |
137 | << "btrfs SNAP_DESTROY failed as non-root; remount with -o user_subvol_rm_allowed" | |
138 | << TEXT_NORMAL << std::endl; | |
139 | } else if (err == -EOPNOTSUPP) { | |
140 | derr << "btrfs SNAP_DESTROY ioctl not supported; you need a kernel newer than 2.6.32" << dendl; | |
141 | } | |
142 | } | |
143 | } else { | |
144 | dout(0) << "detect_feature: SNAP_CREATE failed: " << cpp_strerror(err) << dendl; | |
145 | } | |
146 | ||
147 | if (m_filestore_btrfs_snap) { | |
148 | if (has_snap_destroy) | |
149 | stable_commits = true; | |
150 | else | |
151 | dout(0) << "detect_feature: snaps enabled, but no SNAP_DESTROY ioctl; DISABLING" << dendl; | |
152 | } | |
153 | ||
154 | // start_sync? | |
155 | __u64 transid = 0; | |
156 | r = ::ioctl(get_basedir_fd(), BTRFS_IOC_START_SYNC, &transid); | |
157 | if (r < 0) { | |
158 | int err = errno; | |
159 | dout(0) << "detect_feature: START_SYNC got " << cpp_strerror(err) << dendl; | |
160 | } | |
161 | if (r == 0 && transid > 0) { | |
162 | dout(0) << "detect_feature: START_SYNC is supported (transid " << transid << ")" << dendl; | |
163 | ||
164 | // do we have wait_sync too? | |
165 | r = ::ioctl(get_basedir_fd(), BTRFS_IOC_WAIT_SYNC, &transid); | |
166 | if (r == 0 || errno == ERANGE) { | |
167 | dout(0) << "detect_feature: WAIT_SYNC is supported" << dendl; | |
168 | has_wait_sync = true; | |
169 | } else { | |
170 | int err = errno; | |
171 | dout(0) << "detect_feature: WAIT_SYNC is NOT supported: " << cpp_strerror(err) << dendl; | |
172 | } | |
173 | } else { | |
174 | int err = errno; | |
175 | dout(0) << "detect_feature: START_SYNC is NOT supported: " << cpp_strerror(err) << dendl; | |
176 | } | |
177 | ||
178 | if (has_wait_sync) { | |
179 | // async snap creation? | |
180 | struct btrfs_ioctl_vol_args_v2 async_args; | |
181 | memset(&async_args, 0, sizeof(async_args)); | |
182 | async_args.fd = srcfd; | |
183 | async_args.flags = BTRFS_SUBVOL_CREATE_ASYNC; | |
184 | strcpy(async_args.name, "async_snap_test"); | |
185 | ||
186 | // remove old one, first | |
187 | struct stat st; | |
188 | strcpy(vol_args.name, async_args.name); | |
189 | if (::fstatat(get_basedir_fd(), vol_args.name, &st, 0) == 0) { | |
190 | dout(0) << "detect_feature: removing old async_snap_test" << dendl; | |
191 | r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_DESTROY, &vol_args); | |
192 | if (r != 0) { | |
193 | int err = errno; | |
194 | dout(0) << "detect_feature: failed to remove old async_snap_test: " << cpp_strerror(err) << dendl; | |
195 | } | |
196 | } | |
197 | ||
198 | r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_CREATE_V2, &async_args); | |
199 | if (r == 0 || errno == EEXIST) { | |
200 | dout(0) << "detect_feature: SNAP_CREATE_V2 is supported" << dendl; | |
201 | has_snap_create_v2 = true; | |
202 | ||
203 | // clean up | |
204 | strcpy(vol_args.name, "async_snap_test"); | |
205 | r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_DESTROY, &vol_args); | |
206 | if (r != 0) { | |
207 | int err = errno; | |
208 | dout(0) << "detect_feature: SNAP_DESTROY failed: " << cpp_strerror(err) << dendl; | |
209 | } | |
210 | } else { | |
211 | int err = errno; | |
212 | dout(0) << "detect_feature: SNAP_CREATE_V2 is NOT supported: " << cpp_strerror(err) << dendl; | |
213 | } | |
214 | } | |
215 | ||
216 | // clean up test subvol | |
217 | if (srcfd >= 0) | |
218 | TEMP_FAILURE_RETRY(::close(srcfd)); | |
219 | ||
220 | strcpy(vol_args.name, "test_subvol"); | |
221 | r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_DESTROY, &vol_args); | |
222 | if (r < 0) { | |
223 | r = -errno; | |
224 | dout(0) << "detect_feature: failed to remove " << vol_args.name << ": " << cpp_strerror(r) << dendl; | |
225 | } | |
226 | ||
227 | if (m_filestore_btrfs_snap && !has_snap_create_v2) { | |
228 | dout(0) << "mount WARNING: btrfs snaps enabled, but no SNAP_CREATE_V2 ioctl (from kernel 2.6.37+)" << dendl; | |
229 | cerr << TEXT_YELLOW | |
230 | << " ** WARNING: 'filestore btrfs snap' is enabled (for safe transactions,\n" | |
231 | << " rollback), but btrfs does not support the SNAP_CREATE_V2 ioctl\n" | |
232 | << " (added in Linux 2.6.37). Expect slow btrfs sync/commit\n" | |
233 | << " performance.\n" | |
234 | << TEXT_NORMAL; | |
235 | } | |
236 | ||
237 | return 0; | |
238 | } | |
239 | ||
240 | bool BtrfsFileStoreBackend::can_checkpoint() | |
241 | { | |
242 | return stable_commits; | |
243 | } | |
244 | ||
245 | int BtrfsFileStoreBackend::create_current() | |
246 | { | |
247 | struct stat st; | |
248 | int ret = ::stat(get_current_path().c_str(), &st); | |
249 | if (ret == 0) { | |
250 | // current/ exists | |
251 | if (!S_ISDIR(st.st_mode)) { | |
252 | dout(0) << "create_current: current/ exists but is not a directory" << dendl; | |
253 | return -EINVAL; | |
254 | } | |
255 | ||
256 | struct stat basest; | |
257 | struct statfs currentfs; | |
258 | ret = ::fstat(get_basedir_fd(), &basest); | |
259 | if (ret < 0) { | |
260 | ret = -errno; | |
261 | dout(0) << "create_current: cannot fstat basedir " << cpp_strerror(ret) << dendl; | |
262 | return ret; | |
263 | } | |
264 | ret = ::statfs(get_current_path().c_str(), ¤tfs); | |
265 | if (ret < 0) { | |
266 | ret = -errno; | |
267 | dout(0) << "create_current: cannot statsf basedir " << cpp_strerror(ret) << dendl; | |
268 | return ret; | |
269 | } | |
270 | if (currentfs.f_type == BTRFS_SUPER_MAGIC && basest.st_dev != st.st_dev) { | |
271 | dout(2) << "create_current: current appears to be a btrfs subvolume" << dendl; | |
272 | stable_commits = true; | |
273 | } | |
274 | return 0; | |
275 | } | |
276 | ||
277 | struct btrfs_ioctl_vol_args volargs; | |
278 | memset(&volargs, 0, sizeof(volargs)); | |
279 | ||
280 | volargs.fd = 0; | |
281 | strcpy(volargs.name, "current"); | |
282 | if (::ioctl(get_basedir_fd(), BTRFS_IOC_SUBVOL_CREATE, (unsigned long int)&volargs) < 0) { | |
283 | ret = -errno; | |
284 | dout(0) << "create_current: BTRFS_IOC_SUBVOL_CREATE failed with error " | |
285 | << cpp_strerror(ret) << dendl; | |
286 | return ret; | |
287 | } | |
288 | ||
289 | dout(2) << "create_current: created btrfs subvol " << get_current_path() << dendl; | |
290 | if (::chmod(get_current_path().c_str(), 0755) < 0) { | |
291 | ret = -errno; | |
292 | dout(0) << "create_current: failed to chmod " << get_current_path() << " to 0755: " | |
293 | << cpp_strerror(ret) << dendl; | |
294 | return ret; | |
295 | } | |
296 | ||
297 | stable_commits = true; | |
298 | return 0; | |
299 | } | |
300 | ||
301 | int BtrfsFileStoreBackend::list_checkpoints(list<string>& ls) | |
302 | { | |
303 | int ret, err = 0; | |
304 | ||
305 | struct stat basest; | |
306 | ret = ::fstat(get_basedir_fd(), &basest); | |
307 | if (ret < 0) { | |
308 | ret = -errno; | |
309 | dout(0) << "list_checkpoints: cannot fstat basedir " << cpp_strerror(ret) << dendl; | |
310 | return ret; | |
311 | } | |
312 | ||
313 | // get snap list | |
314 | DIR *dir = ::opendir(get_basedir_path().c_str()); | |
315 | if (!dir) { | |
316 | ret = -errno; | |
317 | dout(0) << "list_checkpoints: opendir '" << get_basedir_path() << "' failed: " | |
318 | << cpp_strerror(ret) << dendl; | |
319 | return ret; | |
320 | } | |
321 | ||
322 | list<string> snaps; | |
323 | char path[PATH_MAX]; | |
324 | struct dirent *de; | |
325 | while ((de = ::readdir(dir))) { | |
326 | snprintf(path, sizeof(path), "%s/%s", get_basedir_path().c_str(), de->d_name); | |
327 | ||
328 | struct stat st; | |
329 | ret = ::stat(path, &st); | |
330 | if (ret < 0) { | |
331 | err = -errno; | |
332 | dout(0) << "list_checkpoints: stat '" << path << "' failed: " | |
333 | << cpp_strerror(err) << dendl; | |
334 | break; | |
335 | } | |
336 | ||
337 | if (!S_ISDIR(st.st_mode)) | |
338 | continue; | |
339 | ||
340 | struct statfs fs; | |
341 | ret = ::statfs(path, &fs); | |
342 | if (ret < 0) { | |
343 | err = -errno; | |
344 | dout(0) << "list_checkpoints: statfs '" << path << "' failed: " | |
345 | << cpp_strerror(err) << dendl; | |
346 | break; | |
347 | } | |
348 | ||
349 | if (fs.f_type == BTRFS_SUPER_MAGIC && basest.st_dev != st.st_dev) | |
350 | snaps.push_back(string(de->d_name)); | |
351 | } | |
352 | ||
353 | if (::closedir(dir) < 0) { | |
354 | ret = -errno; | |
355 | dout(0) << "list_checkpoints: closedir failed: " << cpp_strerror(ret) << dendl; | |
356 | if (!err) | |
357 | err = ret; | |
358 | } | |
359 | ||
360 | if (err) | |
361 | return err; | |
362 | ||
363 | ls.swap(snaps); | |
364 | return 0; | |
365 | } | |
366 | ||
367 | int BtrfsFileStoreBackend::create_checkpoint(const string& name, uint64_t *transid) | |
368 | { | |
369 | dout(10) << "create_checkpoint: '" << name << "'" << dendl; | |
370 | if (has_snap_create_v2 && transid) { | |
371 | struct btrfs_ioctl_vol_args_v2 async_args; | |
372 | memset(&async_args, 0, sizeof(async_args)); | |
373 | async_args.fd = get_current_fd(); | |
374 | async_args.flags = BTRFS_SUBVOL_CREATE_ASYNC; | |
375 | ||
376 | size_t name_size = sizeof(async_args.name); | |
377 | strncpy(async_args.name, name.c_str(), name_size); | |
378 | async_args.name[name_size-1] = '\0'; | |
379 | ||
380 | int r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_CREATE_V2, &async_args); | |
381 | if (r < 0) { | |
382 | r = -errno; | |
383 | dout(0) << "create_checkpoint: async snap create '" << name << "' got " << cpp_strerror(r) << dendl; | |
384 | return r; | |
385 | } | |
386 | dout(20) << "create_checkpoint: async snap create '" << name << "' transid " << async_args.transid << dendl; | |
387 | *transid = async_args.transid; | |
388 | } else { | |
389 | struct btrfs_ioctl_vol_args vol_args; | |
390 | memset(&vol_args, 0, sizeof(vol_args)); | |
391 | vol_args.fd = get_current_fd(); | |
392 | ||
393 | size_t name_size = sizeof(vol_args.name); | |
394 | strncpy(vol_args.name, name.c_str(), name_size); | |
395 | vol_args.name[name_size-1] = '\0'; | |
396 | ||
397 | int r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_CREATE, &vol_args); | |
398 | if (r < 0) { | |
399 | r = -errno; | |
400 | dout(0) << "create_checkpoint: snap create '" << name << "' got " << cpp_strerror(r) << dendl; | |
401 | return r; | |
402 | } | |
403 | if (transid) | |
404 | *transid = 0; | |
405 | } | |
406 | return 0; | |
407 | } | |
408 | ||
409 | int BtrfsFileStoreBackend::sync_checkpoint(uint64_t transid) | |
410 | { | |
411 | // wait for commit | |
412 | dout(10) << "sync_checkpoint: transid " << transid << " to complete" << dendl; | |
413 | int ret = ::ioctl(get_op_fd(), BTRFS_IOC_WAIT_SYNC, &transid); | |
414 | if (ret < 0) { | |
415 | ret = -errno; | |
416 | dout(0) << "sync_checkpoint: ioctl WAIT_SYNC got " << cpp_strerror(ret) << dendl; | |
417 | return -errno; | |
418 | } | |
419 | dout(20) << "sync_checkpoint: done waiting for transid " << transid << dendl; | |
420 | return 0; | |
421 | } | |
422 | ||
423 | int BtrfsFileStoreBackend::rollback_to(const string& name) | |
424 | { | |
425 | dout(10) << "rollback_to: to '" << name << "'" << dendl; | |
426 | char s[PATH_MAX]; | |
427 | btrfs_ioctl_vol_args vol_args; | |
428 | ||
429 | memset(&vol_args, 0, sizeof(vol_args)); | |
430 | vol_args.fd = 0; | |
431 | strcpy(vol_args.name, "current"); | |
432 | ||
433 | int ret = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_DESTROY, &vol_args); | |
434 | if (ret && errno != ENOENT) { | |
435 | dout(0) << "rollback_to: error removing old current subvol: " << cpp_strerror(ret) << dendl; | |
436 | snprintf(s, sizeof(s), "%s/current.remove.me.%d", get_basedir_path().c_str(), rand()); | |
437 | if (::rename(get_current_path().c_str(), s)) { | |
438 | ret = -errno; | |
439 | dout(0) << "rollback_to: error renaming old current subvol: " | |
440 | << cpp_strerror(ret) << dendl; | |
441 | return ret; | |
442 | } | |
443 | } | |
444 | ||
445 | snprintf(s, sizeof(s), "%s/%s", get_basedir_path().c_str(), name.c_str()); | |
446 | ||
447 | // roll back | |
91327a77 | 448 | vol_args.fd = ::open(s, O_RDONLY|O_CLOEXEC); |
7c673cae FG |
449 | if (vol_args.fd < 0) { |
450 | ret = -errno; | |
451 | dout(0) << "rollback_to: error opening '" << s << "': " << cpp_strerror(ret) << dendl; | |
452 | return ret; | |
453 | } | |
454 | ret = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_CREATE, &vol_args); | |
455 | if (ret < 0 ) { | |
456 | ret = -errno; | |
457 | dout(0) << "rollback_to: ioctl SNAP_CREATE got " << cpp_strerror(ret) << dendl; | |
458 | } | |
459 | TEMP_FAILURE_RETRY(::close(vol_args.fd)); | |
460 | return ret; | |
461 | } | |
462 | ||
463 | int BtrfsFileStoreBackend::destroy_checkpoint(const string& name) | |
464 | { | |
465 | dout(10) << "destroy_checkpoint: '" << name << "'" << dendl; | |
466 | btrfs_ioctl_vol_args vol_args; | |
467 | memset(&vol_args, 0, sizeof(vol_args)); | |
468 | vol_args.fd = 0; | |
9f95a23c TL |
469 | strncpy(vol_args.name, name.c_str(), sizeof(vol_args.name) - 1); |
470 | vol_args.name[sizeof(vol_args.name) - 1] = '\0'; | |
7c673cae FG |
471 | |
472 | int ret = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_DESTROY, &vol_args); | |
473 | if (ret) { | |
474 | ret = -errno; | |
475 | dout(0) << "destroy_checkpoint: ioctl SNAP_DESTROY got " << cpp_strerror(ret) << dendl; | |
476 | return ret; | |
477 | } | |
478 | return 0; | |
479 | } | |
480 | ||
481 | int BtrfsFileStoreBackend::syncfs() | |
482 | { | |
483 | dout(15) << "syncfs" << dendl; | |
484 | // do a full btrfs commit | |
485 | int ret = ::ioctl(get_op_fd(), BTRFS_IOC_SYNC); | |
486 | if (ret < 0) { | |
487 | ret = -errno; | |
488 | dout(0) << "syncfs: btrfs IOC_SYNC got " << cpp_strerror(ret) << dendl; | |
489 | } | |
490 | return ret; | |
491 | } | |
492 | ||
493 | int BtrfsFileStoreBackend::clone_range(int from, int to, uint64_t srcoff, uint64_t len, uint64_t dstoff) | |
494 | { | |
495 | dout(20) << "clone_range: " << srcoff << "~" << len << " to " << dstoff << dendl; | |
496 | size_t blk_size = get_blksize(); | |
497 | if (!has_clone_range || | |
498 | srcoff % blk_size != dstoff % blk_size) { | |
499 | dout(20) << "clone_range: using copy" << dendl; | |
500 | return _copy_range(from, to, srcoff, len, dstoff); | |
501 | } | |
502 | ||
503 | int err = 0; | |
504 | int r = 0; | |
505 | ||
506 | uint64_t srcoffclone = ALIGN_UP(srcoff, blk_size); | |
507 | uint64_t dstoffclone = ALIGN_UP(dstoff, blk_size); | |
508 | if (srcoffclone >= srcoff + len) { | |
509 | dout(20) << "clone_range: using copy, extent too short to align srcoff" << dendl; | |
510 | return _copy_range(from, to, srcoff, len, dstoff); | |
511 | } | |
512 | ||
513 | uint64_t lenclone = len - (srcoffclone - srcoff); | |
514 | if (!ALIGNED(lenclone, blk_size)) { | |
515 | struct stat from_stat, to_stat; | |
516 | err = ::fstat(from, &from_stat); | |
517 | if (err) return -errno; | |
518 | err = ::fstat(to , &to_stat); | |
519 | if (err) return -errno; | |
520 | ||
521 | if (srcoff + len != (uint64_t)from_stat.st_size || | |
522 | dstoff + len < (uint64_t)to_stat.st_size) { | |
523 | // Not to the end of the file, need to align length as well | |
524 | lenclone = ALIGN_DOWN(lenclone, blk_size); | |
525 | } | |
526 | } | |
527 | if (lenclone == 0) { | |
528 | // too short | |
529 | return _copy_range(from, to, srcoff, len, dstoff); | |
530 | } | |
531 | ||
532 | dout(20) << "clone_range: cloning " << srcoffclone << "~" << lenclone | |
533 | << " to " << dstoffclone << " = " << r << dendl; | |
534 | btrfs_ioctl_clone_range_args a; | |
535 | a.src_fd = from; | |
536 | a.src_offset = srcoffclone; | |
537 | a.src_length = lenclone; | |
538 | a.dest_offset = dstoffclone; | |
539 | err = ::ioctl(to, BTRFS_IOC_CLONE_RANGE, &a); | |
540 | if (err >= 0) { | |
541 | r += err; | |
542 | } else if (errno == EINVAL) { | |
543 | // Still failed, might be compressed | |
544 | dout(20) << "clone_range: failed CLONE_RANGE call with -EINVAL, using copy" << dendl; | |
545 | return _copy_range(from, to, srcoff, len, dstoff); | |
546 | } else { | |
547 | return -errno; | |
548 | } | |
549 | ||
550 | // Take care any trimmed from front | |
551 | if (srcoffclone != srcoff) { | |
552 | err = _copy_range(from, to, srcoff, srcoffclone - srcoff, dstoff); | |
553 | if (err >= 0) { | |
554 | r += err; | |
555 | } else { | |
556 | return err; | |
557 | } | |
558 | } | |
559 | ||
560 | // Copy end | |
561 | if (srcoffclone + lenclone != srcoff + len) { | |
562 | err = _copy_range(from, to, | |
563 | srcoffclone + lenclone, | |
564 | (srcoff + len) - (srcoffclone + lenclone), | |
565 | dstoffclone + lenclone); | |
566 | if (err >= 0) { | |
567 | r += err; | |
568 | } else { | |
569 | return err; | |
570 | } | |
571 | } | |
572 | dout(20) << "clone_range: finished " << srcoff << "~" << len | |
573 | << " to " << dstoff << " = " << r << dendl; | |
574 | return r; | |
575 | } | |
576 | #endif |