]>
Commit | Line | Data |
---|---|---|
9f0a21e6 MM |
1 | /* |
2 | * Copyright (c) 2020 iXsystems, Inc. | |
3 | * All rights reserved. | |
4 | * | |
5 | * Redistribution and use in source and binary forms, with or without | |
6 | * modification, are permitted provided that the following conditions | |
7 | * are met: | |
8 | * 1. Redistributions of source code must retain the above copyright | |
9 | * notice, this list of conditions and the following disclaimer. | |
10 | * 2. Redistributions in binary form must reproduce the above copyright | |
11 | * notice, this list of conditions and the following disclaimer in the | |
12 | * documentation and/or other materials provided with the distribution. | |
13 | * | |
14 | * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND | |
15 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE | |
18 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
19 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
20 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
21 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
22 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
23 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
24 | * SUCH DAMAGE. | |
25 | * | |
26 | */ | |
27 | ||
e64cc495 MM |
28 | #include <sys/types.h> |
29 | #include <sys/param.h> | |
9f0a21e6 MM |
30 | #include <sys/dmu.h> |
31 | #include <sys/dmu_impl.h> | |
32 | #include <sys/dmu_tx.h> | |
33 | #include <sys/dbuf.h> | |
34 | #include <sys/dnode.h> | |
35 | #include <sys/zfs_context.h> | |
36 | #include <sys/dmu_objset.h> | |
37 | #include <sys/dmu_traverse.h> | |
38 | #include <sys/dsl_dataset.h> | |
39 | #include <sys/dsl_dir.h> | |
40 | #include <sys/dsl_pool.h> | |
41 | #include <sys/dsl_synctask.h> | |
42 | #include <sys/dsl_prop.h> | |
43 | #include <sys/dmu_zfetch.h> | |
44 | #include <sys/zfs_ioctl.h> | |
45 | #include <sys/zap.h> | |
46 | #include <sys/zio_checksum.h> | |
47 | #include <sys/zio_compress.h> | |
48 | #include <sys/sa.h> | |
49 | #include <sys/zfeature.h> | |
50 | #include <sys/abd.h> | |
51 | #include <sys/zfs_rlock.h> | |
52 | #include <sys/racct.h> | |
53 | #include <sys/vm.h> | |
54 | #include <sys/zfs_znode.h> | |
55 | #include <sys/zfs_vnops.h> | |
56 | ||
e64cc495 | 57 | #include <sys/ccompat.h> |
9f0a21e6 MM |
58 | |
59 | #ifndef IDX_TO_OFF | |
60 | #define IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT) | |
61 | #endif | |
62 | ||
63 | #if __FreeBSD_version < 1300051 | |
64 | #define VM_ALLOC_BUSY_FLAGS VM_ALLOC_NOBUSY | |
65 | #else | |
66 | #define VM_ALLOC_BUSY_FLAGS VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY | |
67 | #endif | |
68 | ||
69 | ||
70 | #if __FreeBSD_version < 1300072 | |
71 | #define dmu_page_lock(m) vm_page_lock(m) | |
72 | #define dmu_page_unlock(m) vm_page_unlock(m) | |
73 | #else | |
74 | #define dmu_page_lock(m) | |
75 | #define dmu_page_unlock(m) | |
76 | #endif | |
77 | ||
9f0a21e6 MM |
78 | int |
79 | dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, | |
80 | vm_page_t *ma, dmu_tx_t *tx) | |
81 | { | |
82 | dmu_buf_t **dbp; | |
83 | struct sf_buf *sf; | |
84 | int numbufs, i; | |
85 | int err; | |
86 | ||
87 | if (size == 0) | |
88 | return (0); | |
89 | ||
90 | err = dmu_buf_hold_array(os, object, offset, size, | |
91 | FALSE, FTAG, &numbufs, &dbp); | |
92 | if (err) | |
93 | return (err); | |
94 | ||
95 | for (i = 0; i < numbufs; i++) { | |
96 | int tocpy, copied, thiscpy; | |
97 | int bufoff; | |
98 | dmu_buf_t *db = dbp[i]; | |
99 | caddr_t va; | |
100 | ||
e4efb709 | 101 | ASSERT3U(size, >, 0); |
9f0a21e6 MM |
102 | ASSERT3U(db->db_size, >=, PAGESIZE); |
103 | ||
104 | bufoff = offset - db->db_offset; | |
105 | tocpy = (int)MIN(db->db_size - bufoff, size); | |
106 | ||
107 | ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); | |
108 | ||
109 | if (tocpy == db->db_size) | |
9b1677fb | 110 | dmu_buf_will_fill(db, tx, B_FALSE); |
9f0a21e6 MM |
111 | else |
112 | dmu_buf_will_dirty(db, tx); | |
113 | ||
114 | for (copied = 0; copied < tocpy; copied += PAGESIZE) { | |
115 | ASSERT3U(ptoa((*ma)->pindex), ==, | |
116 | db->db_offset + bufoff); | |
117 | thiscpy = MIN(PAGESIZE, tocpy - copied); | |
118 | va = zfs_map_page(*ma, &sf); | |
861166b0 | 119 | memcpy((char *)db->db_data + bufoff, va, thiscpy); |
9f0a21e6 MM |
120 | zfs_unmap_page(sf); |
121 | ma += 1; | |
122 | bufoff += PAGESIZE; | |
123 | } | |
124 | ||
125 | if (tocpy == db->db_size) | |
9b1677fb | 126 | dmu_buf_fill_done(db, tx, B_FALSE); |
9f0a21e6 MM |
127 | |
128 | offset += tocpy; | |
129 | size -= tocpy; | |
130 | } | |
131 | dmu_buf_rele_array(dbp, numbufs, FTAG); | |
132 | return (err); | |
133 | } | |
134 | ||
135 | int | |
136 | dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, | |
137 | int *rbehind, int *rahead, int last_size) | |
138 | { | |
139 | struct sf_buf *sf; | |
140 | vm_object_t vmobj; | |
141 | vm_page_t m; | |
142 | dmu_buf_t **dbp; | |
143 | dmu_buf_t *db; | |
144 | caddr_t va; | |
145 | int numbufs, i; | |
146 | int bufoff, pgoff, tocpy; | |
147 | int mi, di; | |
148 | int err; | |
149 | ||
150 | ASSERT3U(ma[0]->pindex + count - 1, ==, ma[count - 1]->pindex); | |
e4efb709 | 151 | ASSERT3S(last_size, <=, PAGE_SIZE); |
9f0a21e6 MM |
152 | |
153 | err = dmu_buf_hold_array(os, object, IDX_TO_OFF(ma[0]->pindex), | |
154 | IDX_TO_OFF(count - 1) + last_size, TRUE, FTAG, &numbufs, &dbp); | |
155 | if (err != 0) | |
156 | return (err); | |
157 | ||
6d8da841 | 158 | #ifdef ZFS_DEBUG |
9f0a21e6 MM |
159 | IMPLY(last_size < PAGE_SIZE, *rahead == 0); |
160 | if (dbp[0]->db_offset != 0 || numbufs > 1) { | |
161 | for (i = 0; i < numbufs; i++) { | |
162 | ASSERT(ISP2(dbp[i]->db_size)); | |
e4efb709 | 163 | ASSERT3U((dbp[i]->db_offset % dbp[i]->db_size), ==, 0); |
9f0a21e6 MM |
164 | ASSERT3U(dbp[i]->db_size, ==, dbp[0]->db_size); |
165 | } | |
166 | } | |
167 | #endif | |
168 | ||
169 | vmobj = ma[0]->object; | |
c614fd6e | 170 | zfs_vmobject_wlock_12(vmobj); |
9f0a21e6 MM |
171 | |
172 | db = dbp[0]; | |
173 | for (i = 0; i < *rbehind; i++) { | |
c614fd6e | 174 | m = vm_page_grab_unlocked(vmobj, ma[0]->pindex - 1 - i, |
9f0a21e6 MM |
175 | VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS); |
176 | if (m == NULL) | |
177 | break; | |
178 | if (!vm_page_none_valid(m)) { | |
179 | ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL); | |
180 | vm_page_do_sunbusy(m); | |
181 | break; | |
182 | } | |
e4efb709 | 183 | ASSERT3U(m->dirty, ==, 0); |
c614fd6e | 184 | ASSERT(!pmap_page_is_write_mapped(m)); |
9f0a21e6 | 185 | |
e4efb709 | 186 | ASSERT3U(db->db_size, >, PAGE_SIZE); |
9f0a21e6 MM |
187 | bufoff = IDX_TO_OFF(m->pindex) % db->db_size; |
188 | va = zfs_map_page(m, &sf); | |
861166b0 | 189 | memcpy(va, (char *)db->db_data + bufoff, PAGESIZE); |
9f0a21e6 MM |
190 | zfs_unmap_page(sf); |
191 | vm_page_valid(m); | |
192 | dmu_page_lock(m); | |
193 | if ((m->busy_lock & VPB_BIT_WAITERS) != 0) | |
194 | vm_page_activate(m); | |
195 | else | |
196 | vm_page_deactivate(m); | |
197 | dmu_page_unlock(m); | |
198 | vm_page_do_sunbusy(m); | |
199 | } | |
200 | *rbehind = i; | |
201 | ||
202 | bufoff = IDX_TO_OFF(ma[0]->pindex) % db->db_size; | |
203 | pgoff = 0; | |
204 | for (mi = 0, di = 0; mi < count && di < numbufs; ) { | |
205 | if (pgoff == 0) { | |
206 | m = ma[mi]; | |
207 | if (m != bogus_page) { | |
208 | vm_page_assert_xbusied(m); | |
209 | ASSERT(vm_page_none_valid(m)); | |
e4efb709 | 210 | ASSERT3U(m->dirty, ==, 0); |
c614fd6e | 211 | ASSERT(!pmap_page_is_write_mapped(m)); |
9f0a21e6 MM |
212 | va = zfs_map_page(m, &sf); |
213 | } | |
214 | } | |
215 | if (bufoff == 0) | |
216 | db = dbp[di]; | |
217 | ||
218 | if (m != bogus_page) { | |
219 | ASSERT3U(IDX_TO_OFF(m->pindex) + pgoff, ==, | |
220 | db->db_offset + bufoff); | |
221 | } | |
222 | ||
223 | /* | |
224 | * We do not need to clamp the copy size by the file | |
225 | * size as the last block is zero-filled beyond the | |
226 | * end of file anyway. | |
227 | */ | |
228 | tocpy = MIN(db->db_size - bufoff, PAGESIZE - pgoff); | |
e4efb709 | 229 | ASSERT3S(tocpy, >=, 0); |
9f0a21e6 | 230 | if (m != bogus_page) |
861166b0 | 231 | memcpy(va + pgoff, (char *)db->db_data + bufoff, tocpy); |
9f0a21e6 MM |
232 | |
233 | pgoff += tocpy; | |
e4efb709 RM |
234 | ASSERT3S(pgoff, >=, 0); |
235 | ASSERT3S(pgoff, <=, PAGESIZE); | |
9f0a21e6 MM |
236 | if (pgoff == PAGESIZE) { |
237 | if (m != bogus_page) { | |
238 | zfs_unmap_page(sf); | |
239 | vm_page_valid(m); | |
240 | } | |
e4efb709 | 241 | ASSERT3S(mi, <, count); |
9f0a21e6 MM |
242 | mi++; |
243 | pgoff = 0; | |
244 | } | |
245 | ||
246 | bufoff += tocpy; | |
e4efb709 RM |
247 | ASSERT3S(bufoff, >=, 0); |
248 | ASSERT3S(bufoff, <=, db->db_size); | |
9f0a21e6 | 249 | if (bufoff == db->db_size) { |
e4efb709 | 250 | ASSERT3S(di, <, numbufs); |
9f0a21e6 MM |
251 | di++; |
252 | bufoff = 0; | |
253 | } | |
254 | } | |
255 | ||
6d8da841 | 256 | #ifdef ZFS_DEBUG |
9f0a21e6 MM |
257 | /* |
258 | * Three possibilities: | |
259 | * - last requested page ends at a buffer boundary and , thus, | |
260 | * all pages and buffers have been iterated; | |
261 | * - all requested pages are filled, but the last buffer | |
262 | * has not been exhausted; | |
263 | * the read-ahead is possible only in this case; | |
264 | * - all buffers have been read, but the last page has not been | |
265 | * fully filled; | |
266 | * this is only possible if the file has only a single buffer | |
267 | * with a size that is not a multiple of the page size. | |
268 | */ | |
269 | if (mi == count) { | |
e4efb709 | 270 | ASSERT3S(di, >=, numbufs - 1); |
9f0a21e6 MM |
271 | IMPLY(*rahead != 0, di == numbufs - 1); |
272 | IMPLY(*rahead != 0, bufoff != 0); | |
e4efb709 | 273 | ASSERT0(pgoff); |
9f0a21e6 MM |
274 | } |
275 | if (di == numbufs) { | |
e4efb709 RM |
276 | ASSERT3S(mi, >=, count - 1); |
277 | ASSERT0(*rahead); | |
9f0a21e6 MM |
278 | IMPLY(pgoff == 0, mi == count); |
279 | if (pgoff != 0) { | |
e4efb709 RM |
280 | ASSERT3S(mi, ==, count - 1); |
281 | ASSERT3U((dbp[0]->db_size & PAGE_MASK), !=, 0); | |
9f0a21e6 MM |
282 | } |
283 | } | |
284 | #endif | |
285 | if (pgoff != 0) { | |
e4efb709 | 286 | ASSERT3P(m, !=, bogus_page); |
861166b0 | 287 | memset(va + pgoff, 0, PAGESIZE - pgoff); |
9f0a21e6 MM |
288 | zfs_unmap_page(sf); |
289 | vm_page_valid(m); | |
290 | } | |
291 | ||
292 | for (i = 0; i < *rahead; i++) { | |
c614fd6e | 293 | m = vm_page_grab_unlocked(vmobj, ma[count - 1]->pindex + 1 + i, |
9f0a21e6 MM |
294 | VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS); |
295 | if (m == NULL) | |
296 | break; | |
297 | if (!vm_page_none_valid(m)) { | |
298 | ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL); | |
299 | vm_page_do_sunbusy(m); | |
300 | break; | |
301 | } | |
e4efb709 | 302 | ASSERT3U(m->dirty, ==, 0); |
2e160dee | 303 | ASSERT(!pmap_page_is_write_mapped(m)); |
9f0a21e6 | 304 | |
e4efb709 | 305 | ASSERT3U(db->db_size, >, PAGE_SIZE); |
9f0a21e6 MM |
306 | bufoff = IDX_TO_OFF(m->pindex) % db->db_size; |
307 | tocpy = MIN(db->db_size - bufoff, PAGESIZE); | |
308 | va = zfs_map_page(m, &sf); | |
861166b0 | 309 | memcpy(va, (char *)db->db_data + bufoff, tocpy); |
9f0a21e6 | 310 | if (tocpy < PAGESIZE) { |
e4efb709 RM |
311 | ASSERT3S(i, ==, *rahead - 1); |
312 | ASSERT3U((db->db_size & PAGE_MASK), !=, 0); | |
861166b0 | 313 | memset(va + tocpy, 0, PAGESIZE - tocpy); |
9f0a21e6 MM |
314 | } |
315 | zfs_unmap_page(sf); | |
316 | vm_page_valid(m); | |
317 | dmu_page_lock(m); | |
318 | if ((m->busy_lock & VPB_BIT_WAITERS) != 0) | |
319 | vm_page_activate(m); | |
320 | else | |
321 | vm_page_deactivate(m); | |
322 | dmu_page_unlock(m); | |
323 | vm_page_do_sunbusy(m); | |
324 | } | |
325 | *rahead = i; | |
c614fd6e | 326 | zfs_vmobject_wunlock_12(vmobj); |
9f0a21e6 MM |
327 | |
328 | dmu_buf_rele_array(dbp, numbufs, FTAG); | |
329 | return (0); | |
330 | } |