]>
Commit | Line | Data |
---|---|---|
9f0a21e6 MM |
1 | /* |
2 | * Copyright (c) 2020 iXsystems, Inc. | |
3 | * All rights reserved. | |
4 | * | |
5 | * Redistribution and use in source and binary forms, with or without | |
6 | * modification, are permitted provided that the following conditions | |
7 | * are met: | |
8 | * 1. Redistributions of source code must retain the above copyright | |
9 | * notice, this list of conditions and the following disclaimer. | |
10 | * 2. Redistributions in binary form must reproduce the above copyright | |
11 | * notice, this list of conditions and the following disclaimer in the | |
12 | * documentation and/or other materials provided with the distribution. | |
13 | * | |
14 | * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND | |
15 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE | |
18 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
19 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
20 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
21 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
22 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
23 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
24 | * SUCH DAMAGE. | |
25 | * | |
26 | */ | |
27 | ||
28 | #include <sys/cdefs.h> | |
29 | __FBSDID("$FreeBSD$"); | |
30 | ||
e64cc495 MM |
31 | #include <sys/types.h> |
32 | #include <sys/param.h> | |
9f0a21e6 MM |
33 | #include <sys/dmu.h> |
34 | #include <sys/dmu_impl.h> | |
35 | #include <sys/dmu_tx.h> | |
36 | #include <sys/dbuf.h> | |
37 | #include <sys/dnode.h> | |
38 | #include <sys/zfs_context.h> | |
39 | #include <sys/dmu_objset.h> | |
40 | #include <sys/dmu_traverse.h> | |
41 | #include <sys/dsl_dataset.h> | |
42 | #include <sys/dsl_dir.h> | |
43 | #include <sys/dsl_pool.h> | |
44 | #include <sys/dsl_synctask.h> | |
45 | #include <sys/dsl_prop.h> | |
46 | #include <sys/dmu_zfetch.h> | |
47 | #include <sys/zfs_ioctl.h> | |
48 | #include <sys/zap.h> | |
49 | #include <sys/zio_checksum.h> | |
50 | #include <sys/zio_compress.h> | |
51 | #include <sys/sa.h> | |
52 | #include <sys/zfeature.h> | |
53 | #include <sys/abd.h> | |
54 | #include <sys/zfs_rlock.h> | |
55 | #include <sys/racct.h> | |
56 | #include <sys/vm.h> | |
57 | #include <sys/zfs_znode.h> | |
58 | #include <sys/zfs_vnops.h> | |
59 | ||
e64cc495 | 60 | #include <sys/ccompat.h> |
9f0a21e6 MM |
61 | |
62 | #ifndef IDX_TO_OFF | |
63 | #define IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT) | |
64 | #endif | |
65 | ||
66 | #if __FreeBSD_version < 1300051 | |
67 | #define VM_ALLOC_BUSY_FLAGS VM_ALLOC_NOBUSY | |
68 | #else | |
69 | #define VM_ALLOC_BUSY_FLAGS VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY | |
70 | #endif | |
71 | ||
72 | ||
73 | #if __FreeBSD_version < 1300072 | |
74 | #define dmu_page_lock(m) vm_page_lock(m) | |
75 | #define dmu_page_unlock(m) vm_page_unlock(m) | |
76 | #else | |
77 | #define dmu_page_lock(m) | |
78 | #define dmu_page_unlock(m) | |
79 | #endif | |
80 | ||
81 | static int | |
82 | dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset, | |
83 | uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) | |
84 | { | |
85 | dnode_t *dn; | |
86 | int err; | |
87 | ||
88 | err = dnode_hold(os, object, FTAG, &dn); | |
89 | if (err) | |
90 | return (err); | |
91 | ||
92 | err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, | |
93 | numbufsp, dbpp, DMU_READ_PREFETCH); | |
94 | ||
95 | dnode_rele(dn, FTAG); | |
96 | ||
97 | return (err); | |
98 | } | |
99 | ||
100 | int | |
101 | dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, | |
102 | vm_page_t *ma, dmu_tx_t *tx) | |
103 | { | |
104 | dmu_buf_t **dbp; | |
105 | struct sf_buf *sf; | |
106 | int numbufs, i; | |
107 | int err; | |
108 | ||
109 | if (size == 0) | |
110 | return (0); | |
111 | ||
112 | err = dmu_buf_hold_array(os, object, offset, size, | |
113 | FALSE, FTAG, &numbufs, &dbp); | |
114 | if (err) | |
115 | return (err); | |
116 | ||
117 | for (i = 0; i < numbufs; i++) { | |
118 | int tocpy, copied, thiscpy; | |
119 | int bufoff; | |
120 | dmu_buf_t *db = dbp[i]; | |
121 | caddr_t va; | |
122 | ||
123 | ASSERT(size > 0); | |
124 | ASSERT3U(db->db_size, >=, PAGESIZE); | |
125 | ||
126 | bufoff = offset - db->db_offset; | |
127 | tocpy = (int)MIN(db->db_size - bufoff, size); | |
128 | ||
129 | ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); | |
130 | ||
131 | if (tocpy == db->db_size) | |
132 | dmu_buf_will_fill(db, tx); | |
133 | else | |
134 | dmu_buf_will_dirty(db, tx); | |
135 | ||
136 | for (copied = 0; copied < tocpy; copied += PAGESIZE) { | |
137 | ASSERT3U(ptoa((*ma)->pindex), ==, | |
138 | db->db_offset + bufoff); | |
139 | thiscpy = MIN(PAGESIZE, tocpy - copied); | |
140 | va = zfs_map_page(*ma, &sf); | |
141 | bcopy(va, (char *)db->db_data + bufoff, thiscpy); | |
142 | zfs_unmap_page(sf); | |
143 | ma += 1; | |
144 | bufoff += PAGESIZE; | |
145 | } | |
146 | ||
147 | if (tocpy == db->db_size) | |
148 | dmu_buf_fill_done(db, tx); | |
149 | ||
150 | offset += tocpy; | |
151 | size -= tocpy; | |
152 | } | |
153 | dmu_buf_rele_array(dbp, numbufs, FTAG); | |
154 | return (err); | |
155 | } | |
156 | ||
157 | int | |
158 | dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, | |
159 | int *rbehind, int *rahead, int last_size) | |
160 | { | |
161 | struct sf_buf *sf; | |
162 | vm_object_t vmobj; | |
163 | vm_page_t m; | |
164 | dmu_buf_t **dbp; | |
165 | dmu_buf_t *db; | |
166 | caddr_t va; | |
167 | int numbufs, i; | |
168 | int bufoff, pgoff, tocpy; | |
169 | int mi, di; | |
170 | int err; | |
171 | ||
172 | ASSERT3U(ma[0]->pindex + count - 1, ==, ma[count - 1]->pindex); | |
173 | ASSERT(last_size <= PAGE_SIZE); | |
174 | ||
175 | err = dmu_buf_hold_array(os, object, IDX_TO_OFF(ma[0]->pindex), | |
176 | IDX_TO_OFF(count - 1) + last_size, TRUE, FTAG, &numbufs, &dbp); | |
177 | if (err != 0) | |
178 | return (err); | |
179 | ||
6d8da841 | 180 | #ifdef ZFS_DEBUG |
9f0a21e6 MM |
181 | IMPLY(last_size < PAGE_SIZE, *rahead == 0); |
182 | if (dbp[0]->db_offset != 0 || numbufs > 1) { | |
183 | for (i = 0; i < numbufs; i++) { | |
184 | ASSERT(ISP2(dbp[i]->db_size)); | |
185 | ASSERT((dbp[i]->db_offset % dbp[i]->db_size) == 0); | |
186 | ASSERT3U(dbp[i]->db_size, ==, dbp[0]->db_size); | |
187 | } | |
188 | } | |
189 | #endif | |
190 | ||
191 | vmobj = ma[0]->object; | |
c614fd6e | 192 | zfs_vmobject_wlock_12(vmobj); |
9f0a21e6 MM |
193 | |
194 | db = dbp[0]; | |
195 | for (i = 0; i < *rbehind; i++) { | |
c614fd6e | 196 | m = vm_page_grab_unlocked(vmobj, ma[0]->pindex - 1 - i, |
9f0a21e6 MM |
197 | VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS); |
198 | if (m == NULL) | |
199 | break; | |
200 | if (!vm_page_none_valid(m)) { | |
201 | ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL); | |
202 | vm_page_do_sunbusy(m); | |
203 | break; | |
204 | } | |
205 | ASSERT(m->dirty == 0); | |
c614fd6e | 206 | ASSERT(!pmap_page_is_write_mapped(m)); |
9f0a21e6 MM |
207 | |
208 | ASSERT(db->db_size > PAGE_SIZE); | |
209 | bufoff = IDX_TO_OFF(m->pindex) % db->db_size; | |
210 | va = zfs_map_page(m, &sf); | |
211 | bcopy((char *)db->db_data + bufoff, va, PAGESIZE); | |
212 | zfs_unmap_page(sf); | |
213 | vm_page_valid(m); | |
214 | dmu_page_lock(m); | |
215 | if ((m->busy_lock & VPB_BIT_WAITERS) != 0) | |
216 | vm_page_activate(m); | |
217 | else | |
218 | vm_page_deactivate(m); | |
219 | dmu_page_unlock(m); | |
220 | vm_page_do_sunbusy(m); | |
221 | } | |
222 | *rbehind = i; | |
223 | ||
224 | bufoff = IDX_TO_OFF(ma[0]->pindex) % db->db_size; | |
225 | pgoff = 0; | |
226 | for (mi = 0, di = 0; mi < count && di < numbufs; ) { | |
227 | if (pgoff == 0) { | |
228 | m = ma[mi]; | |
229 | if (m != bogus_page) { | |
230 | vm_page_assert_xbusied(m); | |
231 | ASSERT(vm_page_none_valid(m)); | |
232 | ASSERT(m->dirty == 0); | |
c614fd6e | 233 | ASSERT(!pmap_page_is_write_mapped(m)); |
9f0a21e6 MM |
234 | va = zfs_map_page(m, &sf); |
235 | } | |
236 | } | |
237 | if (bufoff == 0) | |
238 | db = dbp[di]; | |
239 | ||
240 | if (m != bogus_page) { | |
241 | ASSERT3U(IDX_TO_OFF(m->pindex) + pgoff, ==, | |
242 | db->db_offset + bufoff); | |
243 | } | |
244 | ||
245 | /* | |
246 | * We do not need to clamp the copy size by the file | |
247 | * size as the last block is zero-filled beyond the | |
248 | * end of file anyway. | |
249 | */ | |
250 | tocpy = MIN(db->db_size - bufoff, PAGESIZE - pgoff); | |
251 | if (m != bogus_page) | |
252 | bcopy((char *)db->db_data + bufoff, va + pgoff, tocpy); | |
253 | ||
254 | pgoff += tocpy; | |
255 | ASSERT(pgoff <= PAGESIZE); | |
256 | if (pgoff == PAGESIZE) { | |
257 | if (m != bogus_page) { | |
258 | zfs_unmap_page(sf); | |
259 | vm_page_valid(m); | |
260 | } | |
261 | ASSERT(mi < count); | |
262 | mi++; | |
263 | pgoff = 0; | |
264 | } | |
265 | ||
266 | bufoff += tocpy; | |
267 | ASSERT(bufoff <= db->db_size); | |
268 | if (bufoff == db->db_size) { | |
269 | ASSERT(di < numbufs); | |
270 | di++; | |
271 | bufoff = 0; | |
272 | } | |
273 | } | |
274 | ||
6d8da841 | 275 | #ifdef ZFS_DEBUG |
9f0a21e6 MM |
276 | /* |
277 | * Three possibilities: | |
278 | * - last requested page ends at a buffer boundary and , thus, | |
279 | * all pages and buffers have been iterated; | |
280 | * - all requested pages are filled, but the last buffer | |
281 | * has not been exhausted; | |
282 | * the read-ahead is possible only in this case; | |
283 | * - all buffers have been read, but the last page has not been | |
284 | * fully filled; | |
285 | * this is only possible if the file has only a single buffer | |
286 | * with a size that is not a multiple of the page size. | |
287 | */ | |
288 | if (mi == count) { | |
289 | ASSERT(di >= numbufs - 1); | |
290 | IMPLY(*rahead != 0, di == numbufs - 1); | |
291 | IMPLY(*rahead != 0, bufoff != 0); | |
292 | ASSERT(pgoff == 0); | |
293 | } | |
294 | if (di == numbufs) { | |
295 | ASSERT(mi >= count - 1); | |
296 | ASSERT(*rahead == 0); | |
297 | IMPLY(pgoff == 0, mi == count); | |
298 | if (pgoff != 0) { | |
299 | ASSERT(mi == count - 1); | |
300 | ASSERT((dbp[0]->db_size & PAGE_MASK) != 0); | |
301 | } | |
302 | } | |
303 | #endif | |
304 | if (pgoff != 0) { | |
305 | ASSERT(m != bogus_page); | |
306 | bzero(va + pgoff, PAGESIZE - pgoff); | |
307 | zfs_unmap_page(sf); | |
308 | vm_page_valid(m); | |
309 | } | |
310 | ||
311 | for (i = 0; i < *rahead; i++) { | |
c614fd6e | 312 | m = vm_page_grab_unlocked(vmobj, ma[count - 1]->pindex + 1 + i, |
9f0a21e6 MM |
313 | VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS); |
314 | if (m == NULL) | |
315 | break; | |
316 | if (!vm_page_none_valid(m)) { | |
317 | ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL); | |
318 | vm_page_do_sunbusy(m); | |
319 | break; | |
320 | } | |
321 | ASSERT(m->dirty == 0); | |
322 | ASSERT(!pmap_page_is_mapped(m)); | |
323 | ||
324 | ASSERT(db->db_size > PAGE_SIZE); | |
325 | bufoff = IDX_TO_OFF(m->pindex) % db->db_size; | |
326 | tocpy = MIN(db->db_size - bufoff, PAGESIZE); | |
327 | va = zfs_map_page(m, &sf); | |
328 | bcopy((char *)db->db_data + bufoff, va, tocpy); | |
329 | if (tocpy < PAGESIZE) { | |
330 | ASSERT(i == *rahead - 1); | |
331 | ASSERT((db->db_size & PAGE_MASK) != 0); | |
332 | bzero(va + tocpy, PAGESIZE - tocpy); | |
333 | } | |
334 | zfs_unmap_page(sf); | |
335 | vm_page_valid(m); | |
336 | dmu_page_lock(m); | |
337 | if ((m->busy_lock & VPB_BIT_WAITERS) != 0) | |
338 | vm_page_activate(m); | |
339 | else | |
340 | vm_page_deactivate(m); | |
341 | dmu_page_unlock(m); | |
342 | vm_page_do_sunbusy(m); | |
343 | } | |
344 | *rahead = i; | |
c614fd6e | 345 | zfs_vmobject_wunlock_12(vmobj); |
9f0a21e6 MM |
346 | |
347 | dmu_buf_rele_array(dbp, numbufs, FTAG); | |
348 | return (0); | |
349 | } |