]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
1d3ba0bf | 9 | * or https://opensource.org/licenses/CDDL-1.0. |
34dc7c2f BB |
10 | * See the License for the specific language governing permissions |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | ||
22 | /* | |
428870ff | 23 | * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. |
37f03da8 | 24 | * Copyright (c) 2011, 2018 by Delphix. All rights reserved. |
12fa0466 DE |
25 | * Copyright (c) 2014 Integros [integros.com] |
26 | * Copyright 2017 Joyent, Inc. | |
34dc7c2f BB |
27 | */ |
28 | ||
34dc7c2f BB |
29 | #include <sys/spa.h> |
30 | #include <sys/spa_impl.h> | |
31 | #include <sys/zap.h> | |
32 | #include <sys/dsl_synctask.h> | |
33 | #include <sys/dmu_tx.h> | |
34 | #include <sys/dmu_objset.h> | |
6f1ffb06 MA |
35 | #include <sys/dsl_dataset.h> |
36 | #include <sys/dsl_dir.h> | |
34dc7c2f BB |
37 | #include <sys/cmn_err.h> |
38 | #include <sys/sunddi.h> | |
6f1ffb06 | 39 | #include <sys/cred.h> |
428870ff | 40 | #include "zfs_comutil.h" |
4cbde2ec | 41 | #include "zfs_gitrev.h" |
34dc7c2f BB |
42 | #ifdef _KERNEL |
43 | #include <sys/zone.h> | |
44 | #endif | |
45 | ||
46 | /* | |
47 | * Routines to manage the on-disk history log. | |
48 | * | |
49 | * The history log is stored as a dmu object containing | |
50 | * <packed record length, record nvlist> tuples. | |
51 | * | |
d5884c34 | 52 | * Where "record nvlist" is an nvlist containing uint64_ts and strings, and |
34dc7c2f BB |
53 | * "packed record length" is the packed length of the "record nvlist" stored |
54 | * as a little endian uint64_t. | |
55 | * | |
56 | * The log is implemented as a ring buffer, though the original creation | |
57 | * of the pool ('zpool create') is never overwritten. | |
58 | * | |
59 | * The history log is tracked as object 'spa_t::spa_history'. The bonus buffer | |
60 | * of 'spa_history' stores the offsets for logging/retrieving history as | |
61 | * 'spa_history_phys_t'. 'sh_pool_create_len' is the ending offset in bytes of | |
62 | * where the 'zpool create' record is stored. This allows us to never | |
63 | * overwrite the original creation of the pool. 'sh_phys_max_off' is the | |
64 | * physical ending offset in bytes of the log. This tells you the length of | |
65 | * the buffer. 'sh_eof' is the logical EOF (in bytes). Whenever a record | |
e1cfd73f | 66 | * is added, 'sh_eof' is incremented by the size of the record. |
34dc7c2f BB |
67 | * 'sh_eof' is never decremented. 'sh_bof' is the logical BOF (in bytes). |
68 | * This is where the consumer should start reading from after reading in | |
69 | * the 'zpool create' portion of the log. | |
70 | * | |
71 | * 'sh_records_lost' keeps track of how many records have been overwritten | |
72 | * and permanently lost. | |
73 | */ | |
74 | ||
75 | /* convert a logical offset to physical */ | |
76 | static uint64_t | |
77 | spa_history_log_to_phys(uint64_t log_off, spa_history_phys_t *shpp) | |
78 | { | |
79 | uint64_t phys_len; | |
80 | ||
81 | phys_len = shpp->sh_phys_max_off - shpp->sh_pool_create_len; | |
82 | return ((log_off - shpp->sh_pool_create_len) % phys_len | |
83 | + shpp->sh_pool_create_len); | |
84 | } | |
85 | ||
86 | void | |
87 | spa_history_create_obj(spa_t *spa, dmu_tx_t *tx) | |
88 | { | |
89 | dmu_buf_t *dbp; | |
90 | spa_history_phys_t *shpp; | |
91 | objset_t *mos = spa->spa_meta_objset; | |
92 | ||
d5e024cb | 93 | ASSERT0(spa->spa_history); |
34dc7c2f | 94 | spa->spa_history = dmu_object_alloc(mos, DMU_OT_SPA_HISTORY, |
f1512ee6 | 95 | SPA_OLD_MAXBLOCKSIZE, DMU_OT_SPA_HISTORY_OFFSETS, |
34dc7c2f BB |
96 | sizeof (spa_history_phys_t), tx); |
97 | ||
d5e024cb | 98 | VERIFY0(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, |
34dc7c2f | 99 | DMU_POOL_HISTORY, sizeof (uint64_t), 1, |
d5e024cb | 100 | &spa->spa_history, tx)); |
34dc7c2f | 101 | |
d5e024cb BB |
102 | VERIFY0(dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)); |
103 | ASSERT3U(dbp->db_size, >=, sizeof (spa_history_phys_t)); | |
34dc7c2f BB |
104 | |
105 | shpp = dbp->db_data; | |
106 | dmu_buf_will_dirty(dbp, tx); | |
107 | ||
108 | /* | |
109 | * Figure out maximum size of history log. We set it at | |
330d06f9 | 110 | * 0.1% of pool size, with a max of 1G and min of 128KB. |
34dc7c2f | 111 | */ |
428870ff | 112 | shpp->sh_phys_max_off = |
330d06f9 MA |
113 | metaslab_class_get_dspace(spa_normal_class(spa)) / 1000; |
114 | shpp->sh_phys_max_off = MIN(shpp->sh_phys_max_off, 1<<30); | |
34dc7c2f BB |
115 | shpp->sh_phys_max_off = MAX(shpp->sh_phys_max_off, 128<<10); |
116 | ||
117 | dmu_buf_rele(dbp, FTAG); | |
118 | } | |
119 | ||
120 | /* | |
121 | * Change 'sh_bof' to the beginning of the next record. | |
122 | */ | |
123 | static int | |
124 | spa_history_advance_bof(spa_t *spa, spa_history_phys_t *shpp) | |
125 | { | |
126 | objset_t *mos = spa->spa_meta_objset; | |
127 | uint64_t firstread, reclen, phys_bof; | |
128 | char buf[sizeof (reclen)]; | |
129 | int err; | |
130 | ||
131 | phys_bof = spa_history_log_to_phys(shpp->sh_bof, shpp); | |
132 | firstread = MIN(sizeof (reclen), shpp->sh_phys_max_off - phys_bof); | |
133 | ||
134 | if ((err = dmu_read(mos, spa->spa_history, phys_bof, firstread, | |
9babb374 | 135 | buf, DMU_READ_PREFETCH)) != 0) |
34dc7c2f BB |
136 | return (err); |
137 | if (firstread != sizeof (reclen)) { | |
138 | if ((err = dmu_read(mos, spa->spa_history, | |
139 | shpp->sh_pool_create_len, sizeof (reclen) - firstread, | |
9babb374 | 140 | buf + firstread, DMU_READ_PREFETCH)) != 0) |
34dc7c2f BB |
141 | return (err); |
142 | } | |
143 | ||
144 | reclen = LE_64(*((uint64_t *)buf)); | |
145 | shpp->sh_bof += reclen + sizeof (reclen); | |
146 | shpp->sh_records_lost++; | |
147 | return (0); | |
148 | } | |
149 | ||
150 | static int | |
151 | spa_history_write(spa_t *spa, void *buf, uint64_t len, spa_history_phys_t *shpp, | |
152 | dmu_tx_t *tx) | |
153 | { | |
154 | uint64_t firstwrite, phys_eof; | |
155 | objset_t *mos = spa->spa_meta_objset; | |
156 | int err; | |
157 | ||
158 | ASSERT(MUTEX_HELD(&spa->spa_history_lock)); | |
159 | ||
160 | /* see if we need to reset logical BOF */ | |
161 | while (shpp->sh_phys_max_off - shpp->sh_pool_create_len - | |
162 | (shpp->sh_eof - shpp->sh_bof) <= len) { | |
163 | if ((err = spa_history_advance_bof(spa, shpp)) != 0) { | |
164 | return (err); | |
165 | } | |
166 | } | |
167 | ||
168 | phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp); | |
169 | firstwrite = MIN(len, shpp->sh_phys_max_off - phys_eof); | |
170 | shpp->sh_eof += len; | |
171 | dmu_write(mos, spa->spa_history, phys_eof, firstwrite, buf, tx); | |
172 | ||
173 | len -= firstwrite; | |
174 | if (len > 0) { | |
175 | /* write out the rest at the beginning of physical file */ | |
176 | dmu_write(mos, spa->spa_history, shpp->sh_pool_create_len, | |
177 | len, (char *)buf + firstwrite, tx); | |
178 | } | |
179 | ||
180 | return (0); | |
181 | } | |
182 | ||
12fa0466 DE |
183 | /* |
184 | * Post a history sysevent. | |
185 | * | |
186 | * The nvlist_t* passed into this function will be transformed into a new | |
187 | * nvlist where: | |
188 | * | |
189 | * 1. Nested nvlists will be flattened to a single level | |
190 | * 2. Keys will have their names normalized (to remove any problematic | |
191 | * characters, such as whitespace) | |
192 | * | |
193 | * The nvlist_t passed into this function will duplicated and should be freed | |
194 | * by caller. | |
195 | * | |
196 | */ | |
197 | static void | |
198 | spa_history_log_notify(spa_t *spa, nvlist_t *nvl) | |
199 | { | |
200 | nvlist_t *hist_nvl = fnvlist_alloc(); | |
201 | uint64_t uint64; | |
d1807f16 | 202 | const char *string; |
12fa0466 DE |
203 | |
204 | if (nvlist_lookup_string(nvl, ZPOOL_HIST_CMD, &string) == 0) | |
205 | fnvlist_add_string(hist_nvl, ZFS_EV_HIST_CMD, string); | |
206 | ||
207 | if (nvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME, &string) == 0) | |
208 | fnvlist_add_string(hist_nvl, ZFS_EV_HIST_INT_NAME, string); | |
209 | ||
210 | if (nvlist_lookup_string(nvl, ZPOOL_HIST_ZONE, &string) == 0) | |
211 | fnvlist_add_string(hist_nvl, ZFS_EV_HIST_ZONE, string); | |
212 | ||
213 | if (nvlist_lookup_string(nvl, ZPOOL_HIST_HOST, &string) == 0) | |
214 | fnvlist_add_string(hist_nvl, ZFS_EV_HIST_HOST, string); | |
215 | ||
216 | if (nvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME, &string) == 0) | |
217 | fnvlist_add_string(hist_nvl, ZFS_EV_HIST_DSNAME, string); | |
218 | ||
219 | if (nvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR, &string) == 0) | |
220 | fnvlist_add_string(hist_nvl, ZFS_EV_HIST_INT_STR, string); | |
221 | ||
222 | if (nvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL, &string) == 0) | |
223 | fnvlist_add_string(hist_nvl, ZFS_EV_HIST_IOCTL, string); | |
224 | ||
225 | if (nvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME, &string) == 0) | |
226 | fnvlist_add_string(hist_nvl, ZFS_EV_HIST_INT_NAME, string); | |
227 | ||
228 | if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_DSID, &uint64) == 0) | |
229 | fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_DSID, uint64); | |
230 | ||
231 | if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG, &uint64) == 0) | |
232 | fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_TXG, uint64); | |
233 | ||
234 | if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_TIME, &uint64) == 0) | |
235 | fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_TIME, uint64); | |
236 | ||
237 | if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_WHO, &uint64) == 0) | |
238 | fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_WHO, uint64); | |
239 | ||
240 | if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_INT_EVENT, &uint64) == 0) | |
241 | fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_INT_EVENT, uint64); | |
242 | ||
243 | spa_event_notify(spa, NULL, hist_nvl, ESC_ZFS_HISTORY_EVENT); | |
244 | ||
245 | nvlist_free(hist_nvl); | |
246 | } | |
247 | ||
34dc7c2f BB |
248 | /* |
249 | * Write out a history event. | |
250 | */ | |
251 | static void | |
13fe0198 | 252 | spa_history_log_sync(void *arg, dmu_tx_t *tx) |
34dc7c2f | 253 | { |
13fe0198 MA |
254 | nvlist_t *nvl = arg; |
255 | spa_t *spa = dmu_tx_pool(tx)->dp_spa; | |
34dc7c2f BB |
256 | objset_t *mos = spa->spa_meta_objset; |
257 | dmu_buf_t *dbp; | |
258 | spa_history_phys_t *shpp; | |
259 | size_t reclen; | |
260 | uint64_t le_len; | |
34dc7c2f BB |
261 | char *record_packed = NULL; |
262 | int ret; | |
263 | ||
264 | /* | |
265 | * If we have an older pool that doesn't have a command | |
266 | * history object, create it now. | |
267 | */ | |
268 | mutex_enter(&spa->spa_history_lock); | |
269 | if (!spa->spa_history) | |
270 | spa_history_create_obj(spa, tx); | |
271 | mutex_exit(&spa->spa_history_lock); | |
272 | ||
273 | /* | |
274 | * Get the offset of where we need to write via the bonus buffer. | |
275 | * Update the offset when the write completes. | |
276 | */ | |
13fe0198 | 277 | VERIFY0(dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)); |
34dc7c2f BB |
278 | shpp = dbp->db_data; |
279 | ||
280 | dmu_buf_will_dirty(dbp, tx); | |
281 | ||
282 | #ifdef ZFS_DEBUG | |
283 | { | |
284 | dmu_object_info_t doi; | |
285 | dmu_object_info_from_db(dbp, &doi); | |
286 | ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS); | |
287 | } | |
288 | #endif | |
289 | ||
f0e324f2 BB |
290 | fnvlist_add_string(nvl, ZPOOL_HIST_HOST, utsname()->nodename); |
291 | ||
6f1ffb06 MA |
292 | if (nvlist_exists(nvl, ZPOOL_HIST_CMD)) { |
293 | zfs_dbgmsg("command: %s", | |
294 | fnvlist_lookup_string(nvl, ZPOOL_HIST_CMD)); | |
295 | } else if (nvlist_exists(nvl, ZPOOL_HIST_INT_NAME)) { | |
296 | if (nvlist_exists(nvl, ZPOOL_HIST_DSNAME)) { | |
297 | zfs_dbgmsg("txg %lld %s %s (id %llu) %s", | |
8e739b2c RE |
298 | (longlong_t)fnvlist_lookup_uint64(nvl, |
299 | ZPOOL_HIST_TXG), | |
6f1ffb06 MA |
300 | fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME), |
301 | fnvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME), | |
8e739b2c RE |
302 | (u_longlong_t)fnvlist_lookup_uint64(nvl, |
303 | ZPOOL_HIST_DSID), | |
6f1ffb06 MA |
304 | fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR)); |
305 | } else { | |
306 | zfs_dbgmsg("txg %lld %s %s", | |
8e739b2c RE |
307 | (longlong_t)fnvlist_lookup_uint64(nvl, |
308 | ZPOOL_HIST_TXG), | |
6f1ffb06 MA |
309 | fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME), |
310 | fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR)); | |
311 | } | |
12fa0466 DE |
312 | /* |
313 | * The history sysevent is posted only for internal history | |
314 | * messages to show what has happened, not how it happened. For | |
315 | * example, the following command: | |
316 | * | |
317 | * # zfs destroy -r tank/foo | |
318 | * | |
319 | * will result in one sysevent posted per dataset that is | |
320 | * destroyed as a result of the command - which could be more | |
321 | * than one event in total. By contrast, if the sysevent was | |
322 | * posted as a result of the ZPOOL_HIST_CMD key being present | |
323 | * it would result in only one sysevent being posted with the | |
324 | * full command line arguments, requiring the consumer to know | |
76d04993 | 325 | * how to parse and understand zfs(8) command invocations. |
12fa0466 DE |
326 | */ |
327 | spa_history_log_notify(spa, nvl); | |
6f1ffb06 MA |
328 | } else if (nvlist_exists(nvl, ZPOOL_HIST_IOCTL)) { |
329 | zfs_dbgmsg("ioctl %s", | |
330 | fnvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL)); | |
34dc7c2f BB |
331 | } |
332 | ||
6f1ffb06 | 333 | VERIFY3U(nvlist_pack(nvl, &record_packed, &reclen, NV_ENCODE_NATIVE, |
79c76d5b | 334 | KM_SLEEP), ==, 0); |
34dc7c2f BB |
335 | |
336 | mutex_enter(&spa->spa_history_lock); | |
34dc7c2f BB |
337 | |
338 | /* write out the packed length as little endian */ | |
339 | le_len = LE_64((uint64_t)reclen); | |
340 | ret = spa_history_write(spa, &le_len, sizeof (le_len), shpp, tx); | |
341 | if (!ret) | |
342 | ret = spa_history_write(spa, record_packed, reclen, shpp, tx); | |
343 | ||
6f1ffb06 MA |
344 | /* The first command is the create, which we keep forever */ |
345 | if (ret == 0 && shpp->sh_pool_create_len == 0 && | |
346 | nvlist_exists(nvl, ZPOOL_HIST_CMD)) { | |
347 | shpp->sh_pool_create_len = shpp->sh_bof = shpp->sh_eof; | |
34dc7c2f BB |
348 | } |
349 | ||
350 | mutex_exit(&spa->spa_history_lock); | |
6f1ffb06 | 351 | fnvlist_pack_free(record_packed, reclen); |
34dc7c2f | 352 | dmu_buf_rele(dbp, FTAG); |
6f1ffb06 | 353 | fnvlist_free(nvl); |
34dc7c2f BB |
354 | } |
355 | ||
356 | /* | |
357 | * Write out a history event. | |
358 | */ | |
359 | int | |
6f1ffb06 MA |
360 | spa_history_log(spa_t *spa, const char *msg) |
361 | { | |
362 | int err; | |
79c76d5b | 363 | nvlist_t *nvl = fnvlist_alloc(); |
6f1ffb06 MA |
364 | |
365 | fnvlist_add_string(nvl, ZPOOL_HIST_CMD, msg); | |
366 | err = spa_history_log_nvl(spa, nvl); | |
367 | fnvlist_free(nvl); | |
368 | return (err); | |
369 | } | |
370 | ||
371 | int | |
372 | spa_history_log_nvl(spa_t *spa, nvlist_t *nvl) | |
34dc7c2f | 373 | { |
428870ff BB |
374 | int err = 0; |
375 | dmu_tx_t *tx; | |
b5256303 | 376 | nvlist_t *nvarg, *in_nvl = NULL; |
34dc7c2f | 377 | |
6f1ffb06 | 378 | if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa)) |
2e528b49 | 379 | return (SET_ERROR(EINVAL)); |
34dc7c2f | 380 | |
b5256303 TC |
381 | err = nvlist_lookup_nvlist(nvl, ZPOOL_HIST_INPUT_NVL, &in_nvl); |
382 | if (err == 0) { | |
383 | (void) nvlist_remove_all(in_nvl, ZPOOL_HIDDEN_ARGS); | |
384 | } | |
385 | ||
428870ff BB |
386 | tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); |
387 | err = dmu_tx_assign(tx, TXG_WAIT); | |
388 | if (err) { | |
389 | dmu_tx_abort(tx); | |
390 | return (err); | |
391 | } | |
392 | ||
79c76d5b | 393 | VERIFY0(nvlist_dup(nvl, &nvarg, KM_SLEEP)); |
6f1ffb06 MA |
394 | if (spa_history_zone() != NULL) { |
395 | fnvlist_add_string(nvarg, ZPOOL_HIST_ZONE, | |
396 | spa_history_zone()); | |
397 | } | |
398 | fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED())); | |
428870ff | 399 | |
2ac90457 MA |
400 | /* |
401 | * Since the history is recorded asynchronously, the effective time is | |
402 | * now, which may be considerably before the change is made on disk. | |
403 | */ | |
404 | fnvlist_add_uint64(nvarg, ZPOOL_HIST_TIME, gethrestime_sec()); | |
405 | ||
428870ff | 406 | /* Kick this off asynchronously; errors are ignored. */ |
38080324 | 407 | dsl_sync_task_nowait(spa_get_dsl(spa), spa_history_log_sync, nvarg, tx); |
428870ff BB |
408 | dmu_tx_commit(tx); |
409 | ||
6f1ffb06 | 410 | /* spa_history_log_sync will free nvl */ |
428870ff | 411 | return (err); |
34dc7c2f BB |
412 | } |
413 | ||
414 | /* | |
415 | * Read out the command history. | |
416 | */ | |
417 | int | |
418 | spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf) | |
419 | { | |
420 | objset_t *mos = spa->spa_meta_objset; | |
421 | dmu_buf_t *dbp; | |
422 | uint64_t read_len, phys_read_off, phys_eof; | |
423 | uint64_t leftover = 0; | |
424 | spa_history_phys_t *shpp; | |
425 | int err; | |
426 | ||
427 | /* | |
6f1ffb06 | 428 | * If the command history doesn't exist (older pool), |
34dc7c2f BB |
429 | * that's ok, just return ENOENT. |
430 | */ | |
431 | if (!spa->spa_history) | |
2e528b49 | 432 | return (SET_ERROR(ENOENT)); |
34dc7c2f | 433 | |
428870ff BB |
434 | /* |
435 | * The history is logged asynchronously, so when they request | |
436 | * the first chunk of history, make sure everything has been | |
437 | * synced to disk so that we get it. | |
438 | */ | |
439 | if (*offp == 0 && spa_writeable(spa)) | |
440 | txg_wait_synced(spa_get_dsl(spa), 0); | |
441 | ||
34dc7c2f BB |
442 | if ((err = dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)) != 0) |
443 | return (err); | |
444 | shpp = dbp->db_data; | |
445 | ||
446 | #ifdef ZFS_DEBUG | |
447 | { | |
448 | dmu_object_info_t doi; | |
449 | dmu_object_info_from_db(dbp, &doi); | |
450 | ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS); | |
451 | } | |
452 | #endif | |
453 | ||
454 | mutex_enter(&spa->spa_history_lock); | |
455 | phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp); | |
456 | ||
457 | if (*offp < shpp->sh_pool_create_len) { | |
458 | /* read in just the zpool create history */ | |
459 | phys_read_off = *offp; | |
460 | read_len = MIN(*len, shpp->sh_pool_create_len - | |
461 | phys_read_off); | |
462 | } else { | |
463 | /* | |
464 | * Need to reset passed in offset to BOF if the passed in | |
465 | * offset has since been overwritten. | |
466 | */ | |
467 | *offp = MAX(*offp, shpp->sh_bof); | |
468 | phys_read_off = spa_history_log_to_phys(*offp, shpp); | |
469 | ||
470 | /* | |
471 | * Read up to the minimum of what the user passed down or | |
472 | * the EOF (physical or logical). If we hit physical EOF, | |
473 | * use 'leftover' to read from the physical BOF. | |
474 | */ | |
475 | if (phys_read_off <= phys_eof) { | |
476 | read_len = MIN(*len, phys_eof - phys_read_off); | |
477 | } else { | |
478 | read_len = MIN(*len, | |
479 | shpp->sh_phys_max_off - phys_read_off); | |
480 | if (phys_read_off + *len > shpp->sh_phys_max_off) { | |
481 | leftover = MIN(*len - read_len, | |
482 | phys_eof - shpp->sh_pool_create_len); | |
483 | } | |
484 | } | |
485 | } | |
486 | ||
487 | /* offset for consumer to use next */ | |
488 | *offp += read_len + leftover; | |
489 | ||
490 | /* tell the consumer how much you actually read */ | |
491 | *len = read_len + leftover; | |
492 | ||
493 | if (read_len == 0) { | |
494 | mutex_exit(&spa->spa_history_lock); | |
495 | dmu_buf_rele(dbp, FTAG); | |
496 | return (0); | |
497 | } | |
498 | ||
9babb374 BB |
499 | err = dmu_read(mos, spa->spa_history, phys_read_off, read_len, buf, |
500 | DMU_READ_PREFETCH); | |
34dc7c2f BB |
501 | if (leftover && err == 0) { |
502 | err = dmu_read(mos, spa->spa_history, shpp->sh_pool_create_len, | |
9babb374 | 503 | leftover, buf + read_len, DMU_READ_PREFETCH); |
34dc7c2f BB |
504 | } |
505 | mutex_exit(&spa->spa_history_lock); | |
506 | ||
507 | dmu_buf_rele(dbp, FTAG); | |
508 | return (err); | |
509 | } | |
510 | ||
6f1ffb06 MA |
511 | /* |
512 | * The nvlist will be consumed by this call. | |
513 | */ | |
45d1cae3 | 514 | static void |
6f1ffb06 | 515 | log_internal(nvlist_t *nvl, const char *operation, spa_t *spa, |
428870ff | 516 | dmu_tx_t *tx, const char *fmt, va_list adx) |
34dc7c2f | 517 | { |
6f1ffb06 | 518 | char *msg; |
34dc7c2f | 519 | |
b128c09f BB |
520 | /* |
521 | * If this is part of creating a pool, not everything is | |
522 | * initialized yet, so don't bother logging the internal events. | |
6f1ffb06 | 523 | * Likewise if the pool is not writeable. |
b128c09f | 524 | */ |
d5e024cb | 525 | if (spa_is_initializing(spa) || !spa_writeable(spa)) { |
6f1ffb06 | 526 | fnvlist_free(nvl); |
b128c09f | 527 | return; |
6f1ffb06 | 528 | } |
b128c09f | 529 | |
841c9d43 | 530 | msg = kmem_vasprintf(fmt, adx); |
6f1ffb06 | 531 | fnvlist_add_string(nvl, ZPOOL_HIST_INT_STR, msg); |
e4f5fa12 | 532 | kmem_strfree(msg); |
6f1ffb06 MA |
533 | |
534 | fnvlist_add_string(nvl, ZPOOL_HIST_INT_NAME, operation); | |
535 | fnvlist_add_uint64(nvl, ZPOOL_HIST_TXG, tx->tx_txg); | |
2ac90457 | 536 | fnvlist_add_uint64(nvl, ZPOOL_HIST_TIME, gethrestime_sec()); |
34dc7c2f BB |
537 | |
538 | if (dmu_tx_is_syncing(tx)) { | |
13fe0198 | 539 | spa_history_log_sync(nvl, tx); |
34dc7c2f | 540 | } else { |
13fe0198 | 541 | dsl_sync_task_nowait(spa_get_dsl(spa), |
38080324 | 542 | spa_history_log_sync, nvl, tx); |
34dc7c2f | 543 | } |
6f1ffb06 | 544 | /* spa_history_log_sync() will free nvl */ |
34dc7c2f | 545 | } |
45d1cae3 BB |
546 | |
547 | void | |
6f1ffb06 | 548 | spa_history_log_internal(spa_t *spa, const char *operation, |
428870ff | 549 | dmu_tx_t *tx, const char *fmt, ...) |
45d1cae3 BB |
550 | { |
551 | dmu_tx_t *htx = tx; | |
552 | va_list adx; | |
553 | ||
554 | /* create a tx if we didn't get one */ | |
555 | if (tx == NULL) { | |
556 | htx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); | |
557 | if (dmu_tx_assign(htx, TXG_WAIT) != 0) { | |
558 | dmu_tx_abort(htx); | |
559 | return; | |
560 | } | |
561 | } | |
562 | ||
563 | va_start(adx, fmt); | |
79c76d5b | 564 | log_internal(fnvlist_alloc(), operation, spa, htx, fmt, adx); |
45d1cae3 BB |
565 | va_end(adx); |
566 | ||
567 | /* if we didn't get a tx from the caller, commit the one we made */ | |
568 | if (tx == NULL) | |
569 | dmu_tx_commit(htx); | |
570 | } | |
571 | ||
572 | void | |
6f1ffb06 MA |
573 | spa_history_log_internal_ds(dsl_dataset_t *ds, const char *operation, |
574 | dmu_tx_t *tx, const char *fmt, ...) | |
575 | { | |
576 | va_list adx; | |
eca7b760 | 577 | char namebuf[ZFS_MAX_DATASET_NAME_LEN]; |
79c76d5b | 578 | nvlist_t *nvl = fnvlist_alloc(); |
6f1ffb06 MA |
579 | |
580 | ASSERT(tx != NULL); | |
581 | ||
582 | dsl_dataset_name(ds, namebuf); | |
6f1ffb06 MA |
583 | fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf); |
584 | fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, ds->ds_object); | |
585 | ||
586 | va_start(adx, fmt); | |
587 | log_internal(nvl, operation, dsl_dataset_get_spa(ds), tx, fmt, adx); | |
588 | va_end(adx); | |
589 | } | |
590 | ||
591 | void | |
592 | spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation, | |
593 | dmu_tx_t *tx, const char *fmt, ...) | |
594 | { | |
595 | va_list adx; | |
eca7b760 | 596 | char namebuf[ZFS_MAX_DATASET_NAME_LEN]; |
79c76d5b | 597 | nvlist_t *nvl = fnvlist_alloc(); |
6f1ffb06 MA |
598 | |
599 | ASSERT(tx != NULL); | |
600 | ||
601 | dsl_dir_name(dd, namebuf); | |
6f1ffb06 MA |
602 | fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf); |
603 | fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, | |
d683ddbb | 604 | dsl_dir_phys(dd)->dd_head_dataset_obj); |
6f1ffb06 MA |
605 | |
606 | va_start(adx, fmt); | |
607 | log_internal(nvl, operation, dd->dd_pool->dp_spa, tx, fmt, adx); | |
608 | va_end(adx); | |
609 | } | |
610 | ||
611 | void | |
d5e024cb | 612 | spa_history_log_version(spa_t *spa, const char *operation, dmu_tx_t *tx) |
45d1cae3 | 613 | { |
f0e324f2 BB |
614 | utsname_t *u = utsname(); |
615 | ||
d5e024cb | 616 | spa_history_log_internal(spa, operation, tx, |
4cbde2ec MA |
617 | "pool version %llu; software version %s; uts %s %s %s %s", |
618 | (u_longlong_t)spa_version(spa), ZFS_META_GITREV, | |
f0e324f2 | 619 | u->nodename, u->release, u->version, u->machine); |
45d1cae3 | 620 | } |
c28b2279 | 621 | |
9bb907bc RM |
622 | #ifndef _KERNEL |
623 | const char * | |
624 | spa_history_zone(void) | |
625 | { | |
626 | return (NULL); | |
627 | } | |
628 | #endif | |
629 | ||
93ce2b4c | 630 | #if defined(_KERNEL) |
c28b2279 BB |
631 | EXPORT_SYMBOL(spa_history_create_obj); |
632 | EXPORT_SYMBOL(spa_history_get); | |
633 | EXPORT_SYMBOL(spa_history_log); | |
634 | EXPORT_SYMBOL(spa_history_log_internal); | |
635 | EXPORT_SYMBOL(spa_history_log_version); | |
636 | #endif |