]> git.proxmox.com Git - mirror_zfs.git/blame - module/os/freebsd/zfs/dmu_os.c
dmu: Allow buffer fills to fail
[mirror_zfs.git] / module / os / freebsd / zfs / dmu_os.c
CommitLineData
9f0a21e6
MM
1/*
2 * Copyright (c) 2020 iXsystems, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD$");
30
e64cc495
MM
31#include <sys/types.h>
32#include <sys/param.h>
9f0a21e6
MM
33#include <sys/dmu.h>
34#include <sys/dmu_impl.h>
35#include <sys/dmu_tx.h>
36#include <sys/dbuf.h>
37#include <sys/dnode.h>
38#include <sys/zfs_context.h>
39#include <sys/dmu_objset.h>
40#include <sys/dmu_traverse.h>
41#include <sys/dsl_dataset.h>
42#include <sys/dsl_dir.h>
43#include <sys/dsl_pool.h>
44#include <sys/dsl_synctask.h>
45#include <sys/dsl_prop.h>
46#include <sys/dmu_zfetch.h>
47#include <sys/zfs_ioctl.h>
48#include <sys/zap.h>
49#include <sys/zio_checksum.h>
50#include <sys/zio_compress.h>
51#include <sys/sa.h>
52#include <sys/zfeature.h>
53#include <sys/abd.h>
54#include <sys/zfs_rlock.h>
55#include <sys/racct.h>
56#include <sys/vm.h>
57#include <sys/zfs_znode.h>
58#include <sys/zfs_vnops.h>
59
e64cc495 60#include <sys/ccompat.h>
9f0a21e6
MM
61
62#ifndef IDX_TO_OFF
63#define IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT)
64#endif
65
66#if __FreeBSD_version < 1300051
67#define VM_ALLOC_BUSY_FLAGS VM_ALLOC_NOBUSY
68#else
69#define VM_ALLOC_BUSY_FLAGS VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY
70#endif
71
72
73#if __FreeBSD_version < 1300072
74#define dmu_page_lock(m) vm_page_lock(m)
75#define dmu_page_unlock(m) vm_page_unlock(m)
76#else
77#define dmu_page_lock(m)
78#define dmu_page_unlock(m)
79#endif
80
9f0a21e6
MM
81int
82dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
83 vm_page_t *ma, dmu_tx_t *tx)
84{
85 dmu_buf_t **dbp;
86 struct sf_buf *sf;
87 int numbufs, i;
88 int err;
89
90 if (size == 0)
91 return (0);
92
93 err = dmu_buf_hold_array(os, object, offset, size,
94 FALSE, FTAG, &numbufs, &dbp);
95 if (err)
96 return (err);
97
98 for (i = 0; i < numbufs; i++) {
99 int tocpy, copied, thiscpy;
100 int bufoff;
101 dmu_buf_t *db = dbp[i];
102 caddr_t va;
103
e4efb709 104 ASSERT3U(size, >, 0);
9f0a21e6
MM
105 ASSERT3U(db->db_size, >=, PAGESIZE);
106
107 bufoff = offset - db->db_offset;
108 tocpy = (int)MIN(db->db_size - bufoff, size);
109
110 ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
111
112 if (tocpy == db->db_size)
c0c4866f 113 dmu_buf_will_fill(db, tx, B_FALSE);
9f0a21e6
MM
114 else
115 dmu_buf_will_dirty(db, tx);
116
117 for (copied = 0; copied < tocpy; copied += PAGESIZE) {
118 ASSERT3U(ptoa((*ma)->pindex), ==,
119 db->db_offset + bufoff);
120 thiscpy = MIN(PAGESIZE, tocpy - copied);
121 va = zfs_map_page(*ma, &sf);
861166b0 122 memcpy((char *)db->db_data + bufoff, va, thiscpy);
9f0a21e6
MM
123 zfs_unmap_page(sf);
124 ma += 1;
125 bufoff += PAGESIZE;
126 }
127
128 if (tocpy == db->db_size)
c0c4866f 129 dmu_buf_fill_done(db, tx, B_FALSE);
9f0a21e6
MM
130
131 offset += tocpy;
132 size -= tocpy;
133 }
134 dmu_buf_rele_array(dbp, numbufs, FTAG);
135 return (err);
136}
137
138int
139dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count,
140 int *rbehind, int *rahead, int last_size)
141{
142 struct sf_buf *sf;
143 vm_object_t vmobj;
144 vm_page_t m;
145 dmu_buf_t **dbp;
146 dmu_buf_t *db;
147 caddr_t va;
148 int numbufs, i;
149 int bufoff, pgoff, tocpy;
150 int mi, di;
151 int err;
152
153 ASSERT3U(ma[0]->pindex + count - 1, ==, ma[count - 1]->pindex);
e4efb709 154 ASSERT3S(last_size, <=, PAGE_SIZE);
9f0a21e6
MM
155
156 err = dmu_buf_hold_array(os, object, IDX_TO_OFF(ma[0]->pindex),
157 IDX_TO_OFF(count - 1) + last_size, TRUE, FTAG, &numbufs, &dbp);
158 if (err != 0)
159 return (err);
160
6d8da841 161#ifdef ZFS_DEBUG
9f0a21e6
MM
162 IMPLY(last_size < PAGE_SIZE, *rahead == 0);
163 if (dbp[0]->db_offset != 0 || numbufs > 1) {
164 for (i = 0; i < numbufs; i++) {
165 ASSERT(ISP2(dbp[i]->db_size));
e4efb709 166 ASSERT3U((dbp[i]->db_offset % dbp[i]->db_size), ==, 0);
9f0a21e6
MM
167 ASSERT3U(dbp[i]->db_size, ==, dbp[0]->db_size);
168 }
169 }
170#endif
171
172 vmobj = ma[0]->object;
c614fd6e 173 zfs_vmobject_wlock_12(vmobj);
9f0a21e6
MM
174
175 db = dbp[0];
176 for (i = 0; i < *rbehind; i++) {
c614fd6e 177 m = vm_page_grab_unlocked(vmobj, ma[0]->pindex - 1 - i,
9f0a21e6
MM
178 VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS);
179 if (m == NULL)
180 break;
181 if (!vm_page_none_valid(m)) {
182 ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL);
183 vm_page_do_sunbusy(m);
184 break;
185 }
e4efb709 186 ASSERT3U(m->dirty, ==, 0);
c614fd6e 187 ASSERT(!pmap_page_is_write_mapped(m));
9f0a21e6 188
e4efb709 189 ASSERT3U(db->db_size, >, PAGE_SIZE);
9f0a21e6
MM
190 bufoff = IDX_TO_OFF(m->pindex) % db->db_size;
191 va = zfs_map_page(m, &sf);
861166b0 192 memcpy(va, (char *)db->db_data + bufoff, PAGESIZE);
9f0a21e6
MM
193 zfs_unmap_page(sf);
194 vm_page_valid(m);
195 dmu_page_lock(m);
196 if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
197 vm_page_activate(m);
198 else
199 vm_page_deactivate(m);
200 dmu_page_unlock(m);
201 vm_page_do_sunbusy(m);
202 }
203 *rbehind = i;
204
205 bufoff = IDX_TO_OFF(ma[0]->pindex) % db->db_size;
206 pgoff = 0;
207 for (mi = 0, di = 0; mi < count && di < numbufs; ) {
208 if (pgoff == 0) {
209 m = ma[mi];
210 if (m != bogus_page) {
211 vm_page_assert_xbusied(m);
212 ASSERT(vm_page_none_valid(m));
e4efb709 213 ASSERT3U(m->dirty, ==, 0);
c614fd6e 214 ASSERT(!pmap_page_is_write_mapped(m));
9f0a21e6
MM
215 va = zfs_map_page(m, &sf);
216 }
217 }
218 if (bufoff == 0)
219 db = dbp[di];
220
221 if (m != bogus_page) {
222 ASSERT3U(IDX_TO_OFF(m->pindex) + pgoff, ==,
223 db->db_offset + bufoff);
224 }
225
226 /*
227 * We do not need to clamp the copy size by the file
228 * size as the last block is zero-filled beyond the
229 * end of file anyway.
230 */
231 tocpy = MIN(db->db_size - bufoff, PAGESIZE - pgoff);
e4efb709 232 ASSERT3S(tocpy, >=, 0);
9f0a21e6 233 if (m != bogus_page)
861166b0 234 memcpy(va + pgoff, (char *)db->db_data + bufoff, tocpy);
9f0a21e6
MM
235
236 pgoff += tocpy;
e4efb709
RM
237 ASSERT3S(pgoff, >=, 0);
238 ASSERT3S(pgoff, <=, PAGESIZE);
9f0a21e6
MM
239 if (pgoff == PAGESIZE) {
240 if (m != bogus_page) {
241 zfs_unmap_page(sf);
242 vm_page_valid(m);
243 }
e4efb709 244 ASSERT3S(mi, <, count);
9f0a21e6
MM
245 mi++;
246 pgoff = 0;
247 }
248
249 bufoff += tocpy;
e4efb709
RM
250 ASSERT3S(bufoff, >=, 0);
251 ASSERT3S(bufoff, <=, db->db_size);
9f0a21e6 252 if (bufoff == db->db_size) {
e4efb709 253 ASSERT3S(di, <, numbufs);
9f0a21e6
MM
254 di++;
255 bufoff = 0;
256 }
257 }
258
6d8da841 259#ifdef ZFS_DEBUG
9f0a21e6
MM
260 /*
261 * Three possibilities:
262 * - last requested page ends at a buffer boundary and , thus,
263 * all pages and buffers have been iterated;
264 * - all requested pages are filled, but the last buffer
265 * has not been exhausted;
266 * the read-ahead is possible only in this case;
267 * - all buffers have been read, but the last page has not been
268 * fully filled;
269 * this is only possible if the file has only a single buffer
270 * with a size that is not a multiple of the page size.
271 */
272 if (mi == count) {
e4efb709 273 ASSERT3S(di, >=, numbufs - 1);
9f0a21e6
MM
274 IMPLY(*rahead != 0, di == numbufs - 1);
275 IMPLY(*rahead != 0, bufoff != 0);
e4efb709 276 ASSERT0(pgoff);
9f0a21e6
MM
277 }
278 if (di == numbufs) {
e4efb709
RM
279 ASSERT3S(mi, >=, count - 1);
280 ASSERT0(*rahead);
9f0a21e6
MM
281 IMPLY(pgoff == 0, mi == count);
282 if (pgoff != 0) {
e4efb709
RM
283 ASSERT3S(mi, ==, count - 1);
284 ASSERT3U((dbp[0]->db_size & PAGE_MASK), !=, 0);
9f0a21e6
MM
285 }
286 }
287#endif
288 if (pgoff != 0) {
e4efb709 289 ASSERT3P(m, !=, bogus_page);
861166b0 290 memset(va + pgoff, 0, PAGESIZE - pgoff);
9f0a21e6
MM
291 zfs_unmap_page(sf);
292 vm_page_valid(m);
293 }
294
295 for (i = 0; i < *rahead; i++) {
c614fd6e 296 m = vm_page_grab_unlocked(vmobj, ma[count - 1]->pindex + 1 + i,
9f0a21e6
MM
297 VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS);
298 if (m == NULL)
299 break;
300 if (!vm_page_none_valid(m)) {
301 ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL);
302 vm_page_do_sunbusy(m);
303 break;
304 }
e4efb709 305 ASSERT3U(m->dirty, ==, 0);
2e160dee 306 ASSERT(!pmap_page_is_write_mapped(m));
9f0a21e6 307
e4efb709 308 ASSERT3U(db->db_size, >, PAGE_SIZE);
9f0a21e6
MM
309 bufoff = IDX_TO_OFF(m->pindex) % db->db_size;
310 tocpy = MIN(db->db_size - bufoff, PAGESIZE);
311 va = zfs_map_page(m, &sf);
861166b0 312 memcpy(va, (char *)db->db_data + bufoff, tocpy);
9f0a21e6 313 if (tocpy < PAGESIZE) {
e4efb709
RM
314 ASSERT3S(i, ==, *rahead - 1);
315 ASSERT3U((db->db_size & PAGE_MASK), !=, 0);
861166b0 316 memset(va + tocpy, 0, PAGESIZE - tocpy);
9f0a21e6
MM
317 }
318 zfs_unmap_page(sf);
319 vm_page_valid(m);
320 dmu_page_lock(m);
321 if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
322 vm_page_activate(m);
323 else
324 vm_page_deactivate(m);
325 dmu_page_unlock(m);
326 vm_page_do_sunbusy(m);
327 }
328 *rahead = i;
c614fd6e 329 zfs_vmobject_wunlock_12(vmobj);
9f0a21e6
MM
330
331 dmu_buf_rele_array(dbp, numbufs, FTAG);
332 return (0);
333}