]>
Commit | Line | Data |
---|---|---|
9f0a21e6 MM |
1 | /* |
2 | * Copyright (c) 2020 iXsystems, Inc. | |
3 | * All rights reserved. | |
4 | * | |
5 | * Redistribution and use in source and binary forms, with or without | |
6 | * modification, are permitted provided that the following conditions | |
7 | * are met: | |
8 | * 1. Redistributions of source code must retain the above copyright | |
9 | * notice, this list of conditions and the following disclaimer. | |
10 | * 2. Redistributions in binary form must reproduce the above copyright | |
11 | * notice, this list of conditions and the following disclaimer in the | |
12 | * documentation and/or other materials provided with the distribution. | |
13 | * | |
14 | * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND | |
15 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE | |
18 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
19 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
20 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
21 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
22 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
23 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
24 | * SUCH DAMAGE. | |
25 | * | |
26 | */ | |
27 | ||
28 | #include <sys/cdefs.h> | |
29 | __FBSDID("$FreeBSD$"); | |
30 | ||
31 | #include <sys/dmu.h> | |
32 | #include <sys/dmu_impl.h> | |
33 | #include <sys/dmu_tx.h> | |
34 | #include <sys/dbuf.h> | |
35 | #include <sys/dnode.h> | |
36 | #include <sys/zfs_context.h> | |
37 | #include <sys/dmu_objset.h> | |
38 | #include <sys/dmu_traverse.h> | |
39 | #include <sys/dsl_dataset.h> | |
40 | #include <sys/dsl_dir.h> | |
41 | #include <sys/dsl_pool.h> | |
42 | #include <sys/dsl_synctask.h> | |
43 | #include <sys/dsl_prop.h> | |
44 | #include <sys/dmu_zfetch.h> | |
45 | #include <sys/zfs_ioctl.h> | |
46 | #include <sys/zap.h> | |
47 | #include <sys/zio_checksum.h> | |
48 | #include <sys/zio_compress.h> | |
49 | #include <sys/sa.h> | |
50 | #include <sys/zfeature.h> | |
51 | #include <sys/abd.h> | |
52 | #include <sys/zfs_rlock.h> | |
53 | #include <sys/racct.h> | |
54 | #include <sys/vm.h> | |
55 | #include <sys/zfs_znode.h> | |
56 | #include <sys/zfs_vnops.h> | |
57 | ||
58 | ||
59 | #ifndef IDX_TO_OFF | |
60 | #define IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT) | |
61 | #endif | |
62 | ||
63 | #if __FreeBSD_version < 1300051 | |
64 | #define VM_ALLOC_BUSY_FLAGS VM_ALLOC_NOBUSY | |
65 | #else | |
66 | #define VM_ALLOC_BUSY_FLAGS VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY | |
67 | #endif | |
68 | ||
69 | ||
70 | #if __FreeBSD_version < 1300072 | |
71 | #define dmu_page_lock(m) vm_page_lock(m) | |
72 | #define dmu_page_unlock(m) vm_page_unlock(m) | |
73 | #else | |
74 | #define dmu_page_lock(m) | |
75 | #define dmu_page_unlock(m) | |
76 | #endif | |
77 | ||
78 | static int | |
79 | dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset, | |
80 | uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) | |
81 | { | |
82 | dnode_t *dn; | |
83 | int err; | |
84 | ||
85 | err = dnode_hold(os, object, FTAG, &dn); | |
86 | if (err) | |
87 | return (err); | |
88 | ||
89 | err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, | |
90 | numbufsp, dbpp, DMU_READ_PREFETCH); | |
91 | ||
92 | dnode_rele(dn, FTAG); | |
93 | ||
94 | return (err); | |
95 | } | |
96 | ||
97 | int | |
98 | dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, | |
99 | vm_page_t *ma, dmu_tx_t *tx) | |
100 | { | |
101 | dmu_buf_t **dbp; | |
102 | struct sf_buf *sf; | |
103 | int numbufs, i; | |
104 | int err; | |
105 | ||
106 | if (size == 0) | |
107 | return (0); | |
108 | ||
109 | err = dmu_buf_hold_array(os, object, offset, size, | |
110 | FALSE, FTAG, &numbufs, &dbp); | |
111 | if (err) | |
112 | return (err); | |
113 | ||
114 | for (i = 0; i < numbufs; i++) { | |
115 | int tocpy, copied, thiscpy; | |
116 | int bufoff; | |
117 | dmu_buf_t *db = dbp[i]; | |
118 | caddr_t va; | |
119 | ||
120 | ASSERT(size > 0); | |
121 | ASSERT3U(db->db_size, >=, PAGESIZE); | |
122 | ||
123 | bufoff = offset - db->db_offset; | |
124 | tocpy = (int)MIN(db->db_size - bufoff, size); | |
125 | ||
126 | ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); | |
127 | ||
128 | if (tocpy == db->db_size) | |
129 | dmu_buf_will_fill(db, tx); | |
130 | else | |
131 | dmu_buf_will_dirty(db, tx); | |
132 | ||
133 | for (copied = 0; copied < tocpy; copied += PAGESIZE) { | |
134 | ASSERT3U(ptoa((*ma)->pindex), ==, | |
135 | db->db_offset + bufoff); | |
136 | thiscpy = MIN(PAGESIZE, tocpy - copied); | |
137 | va = zfs_map_page(*ma, &sf); | |
138 | bcopy(va, (char *)db->db_data + bufoff, thiscpy); | |
139 | zfs_unmap_page(sf); | |
140 | ma += 1; | |
141 | bufoff += PAGESIZE; | |
142 | } | |
143 | ||
144 | if (tocpy == db->db_size) | |
145 | dmu_buf_fill_done(db, tx); | |
146 | ||
147 | offset += tocpy; | |
148 | size -= tocpy; | |
149 | } | |
150 | dmu_buf_rele_array(dbp, numbufs, FTAG); | |
151 | return (err); | |
152 | } | |
153 | ||
154 | int | |
155 | dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, | |
156 | int *rbehind, int *rahead, int last_size) | |
157 | { | |
158 | struct sf_buf *sf; | |
159 | vm_object_t vmobj; | |
160 | vm_page_t m; | |
161 | dmu_buf_t **dbp; | |
162 | dmu_buf_t *db; | |
163 | caddr_t va; | |
164 | int numbufs, i; | |
165 | int bufoff, pgoff, tocpy; | |
166 | int mi, di; | |
167 | int err; | |
168 | ||
169 | ASSERT3U(ma[0]->pindex + count - 1, ==, ma[count - 1]->pindex); | |
170 | ASSERT(last_size <= PAGE_SIZE); | |
171 | ||
172 | err = dmu_buf_hold_array(os, object, IDX_TO_OFF(ma[0]->pindex), | |
173 | IDX_TO_OFF(count - 1) + last_size, TRUE, FTAG, &numbufs, &dbp); | |
174 | if (err != 0) | |
175 | return (err); | |
176 | ||
177 | #ifdef DEBUG | |
178 | IMPLY(last_size < PAGE_SIZE, *rahead == 0); | |
179 | if (dbp[0]->db_offset != 0 || numbufs > 1) { | |
180 | for (i = 0; i < numbufs; i++) { | |
181 | ASSERT(ISP2(dbp[i]->db_size)); | |
182 | ASSERT((dbp[i]->db_offset % dbp[i]->db_size) == 0); | |
183 | ASSERT3U(dbp[i]->db_size, ==, dbp[0]->db_size); | |
184 | } | |
185 | } | |
186 | #endif | |
187 | ||
188 | vmobj = ma[0]->object; | |
c614fd6e | 189 | zfs_vmobject_wlock_12(vmobj); |
9f0a21e6 MM |
190 | |
191 | db = dbp[0]; | |
192 | for (i = 0; i < *rbehind; i++) { | |
c614fd6e | 193 | m = vm_page_grab_unlocked(vmobj, ma[0]->pindex - 1 - i, |
9f0a21e6 MM |
194 | VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS); |
195 | if (m == NULL) | |
196 | break; | |
197 | if (!vm_page_none_valid(m)) { | |
198 | ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL); | |
199 | vm_page_do_sunbusy(m); | |
200 | break; | |
201 | } | |
202 | ASSERT(m->dirty == 0); | |
c614fd6e | 203 | ASSERT(!pmap_page_is_write_mapped(m)); |
9f0a21e6 MM |
204 | |
205 | ASSERT(db->db_size > PAGE_SIZE); | |
206 | bufoff = IDX_TO_OFF(m->pindex) % db->db_size; | |
207 | va = zfs_map_page(m, &sf); | |
208 | bcopy((char *)db->db_data + bufoff, va, PAGESIZE); | |
209 | zfs_unmap_page(sf); | |
210 | vm_page_valid(m); | |
211 | dmu_page_lock(m); | |
212 | if ((m->busy_lock & VPB_BIT_WAITERS) != 0) | |
213 | vm_page_activate(m); | |
214 | else | |
215 | vm_page_deactivate(m); | |
216 | dmu_page_unlock(m); | |
217 | vm_page_do_sunbusy(m); | |
218 | } | |
219 | *rbehind = i; | |
220 | ||
221 | bufoff = IDX_TO_OFF(ma[0]->pindex) % db->db_size; | |
222 | pgoff = 0; | |
223 | for (mi = 0, di = 0; mi < count && di < numbufs; ) { | |
224 | if (pgoff == 0) { | |
225 | m = ma[mi]; | |
226 | if (m != bogus_page) { | |
227 | vm_page_assert_xbusied(m); | |
228 | ASSERT(vm_page_none_valid(m)); | |
229 | ASSERT(m->dirty == 0); | |
c614fd6e | 230 | ASSERT(!pmap_page_is_write_mapped(m)); |
9f0a21e6 MM |
231 | va = zfs_map_page(m, &sf); |
232 | } | |
233 | } | |
234 | if (bufoff == 0) | |
235 | db = dbp[di]; | |
236 | ||
237 | if (m != bogus_page) { | |
238 | ASSERT3U(IDX_TO_OFF(m->pindex) + pgoff, ==, | |
239 | db->db_offset + bufoff); | |
240 | } | |
241 | ||
242 | /* | |
243 | * We do not need to clamp the copy size by the file | |
244 | * size as the last block is zero-filled beyond the | |
245 | * end of file anyway. | |
246 | */ | |
247 | tocpy = MIN(db->db_size - bufoff, PAGESIZE - pgoff); | |
248 | if (m != bogus_page) | |
249 | bcopy((char *)db->db_data + bufoff, va + pgoff, tocpy); | |
250 | ||
251 | pgoff += tocpy; | |
252 | ASSERT(pgoff <= PAGESIZE); | |
253 | if (pgoff == PAGESIZE) { | |
254 | if (m != bogus_page) { | |
255 | zfs_unmap_page(sf); | |
256 | vm_page_valid(m); | |
257 | } | |
258 | ASSERT(mi < count); | |
259 | mi++; | |
260 | pgoff = 0; | |
261 | } | |
262 | ||
263 | bufoff += tocpy; | |
264 | ASSERT(bufoff <= db->db_size); | |
265 | if (bufoff == db->db_size) { | |
266 | ASSERT(di < numbufs); | |
267 | di++; | |
268 | bufoff = 0; | |
269 | } | |
270 | } | |
271 | ||
272 | #ifdef DEBUG | |
273 | /* | |
274 | * Three possibilities: | |
275 | * - last requested page ends at a buffer boundary and , thus, | |
276 | * all pages and buffers have been iterated; | |
277 | * - all requested pages are filled, but the last buffer | |
278 | * has not been exhausted; | |
279 | * the read-ahead is possible only in this case; | |
280 | * - all buffers have been read, but the last page has not been | |
281 | * fully filled; | |
282 | * this is only possible if the file has only a single buffer | |
283 | * with a size that is not a multiple of the page size. | |
284 | */ | |
285 | if (mi == count) { | |
286 | ASSERT(di >= numbufs - 1); | |
287 | IMPLY(*rahead != 0, di == numbufs - 1); | |
288 | IMPLY(*rahead != 0, bufoff != 0); | |
289 | ASSERT(pgoff == 0); | |
290 | } | |
291 | if (di == numbufs) { | |
292 | ASSERT(mi >= count - 1); | |
293 | ASSERT(*rahead == 0); | |
294 | IMPLY(pgoff == 0, mi == count); | |
295 | if (pgoff != 0) { | |
296 | ASSERT(mi == count - 1); | |
297 | ASSERT((dbp[0]->db_size & PAGE_MASK) != 0); | |
298 | } | |
299 | } | |
300 | #endif | |
301 | if (pgoff != 0) { | |
302 | ASSERT(m != bogus_page); | |
303 | bzero(va + pgoff, PAGESIZE - pgoff); | |
304 | zfs_unmap_page(sf); | |
305 | vm_page_valid(m); | |
306 | } | |
307 | ||
308 | for (i = 0; i < *rahead; i++) { | |
c614fd6e | 309 | m = vm_page_grab_unlocked(vmobj, ma[count - 1]->pindex + 1 + i, |
9f0a21e6 MM |
310 | VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS); |
311 | if (m == NULL) | |
312 | break; | |
313 | if (!vm_page_none_valid(m)) { | |
314 | ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL); | |
315 | vm_page_do_sunbusy(m); | |
316 | break; | |
317 | } | |
318 | ASSERT(m->dirty == 0); | |
319 | ASSERT(!pmap_page_is_mapped(m)); | |
320 | ||
321 | ASSERT(db->db_size > PAGE_SIZE); | |
322 | bufoff = IDX_TO_OFF(m->pindex) % db->db_size; | |
323 | tocpy = MIN(db->db_size - bufoff, PAGESIZE); | |
324 | va = zfs_map_page(m, &sf); | |
325 | bcopy((char *)db->db_data + bufoff, va, tocpy); | |
326 | if (tocpy < PAGESIZE) { | |
327 | ASSERT(i == *rahead - 1); | |
328 | ASSERT((db->db_size & PAGE_MASK) != 0); | |
329 | bzero(va + tocpy, PAGESIZE - tocpy); | |
330 | } | |
331 | zfs_unmap_page(sf); | |
332 | vm_page_valid(m); | |
333 | dmu_page_lock(m); | |
334 | if ((m->busy_lock & VPB_BIT_WAITERS) != 0) | |
335 | vm_page_activate(m); | |
336 | else | |
337 | vm_page_deactivate(m); | |
338 | dmu_page_unlock(m); | |
339 | vm_page_do_sunbusy(m); | |
340 | } | |
341 | *rahead = i; | |
c614fd6e | 342 | zfs_vmobject_wunlock_12(vmobj); |
9f0a21e6 MM |
343 | |
344 | dmu_buf_rele_array(dbp, numbufs, FTAG); | |
345 | return (0); | |
346 | } |