]>
Commit | Line | Data |
---|---|---|
448b2003 | 1 | /* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2016 Nicira, Inc. |
bd06962a BP |
2 | * |
3 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
4 | * you may not use this file except in compliance with the License. | |
5 | * You may obtain a copy of the License at: | |
6 | * | |
7 | * http://www.apache.org/licenses/LICENSE-2.0 | |
8 | * | |
9 | * Unless required by applicable law or agreed to in writing, software | |
10 | * distributed under the License is distributed on an "AS IS" BASIS, | |
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 | * See the License for the specific language governing permissions and | |
13 | * limitations under the License. | |
14 | */ | |
15 | ||
16 | #include <config.h> | |
17 | ||
18 | #include "file.h" | |
19 | ||
ada496b5 | 20 | #include <errno.h> |
bd06962a | 21 | #include <fcntl.h> |
ada496b5 | 22 | #include <unistd.h> |
bd06962a | 23 | |
17d18afb | 24 | #include "bitmap.h" |
bd06962a BP |
25 | #include "column.h" |
26 | #include "log.h" | |
ee89ea7b | 27 | #include "openvswitch/json.h" |
ada496b5 | 28 | #include "lockfile.h" |
bd06962a BP |
29 | #include "ovsdb.h" |
30 | #include "ovsdb-error.h" | |
31 | #include "row.h" | |
ada496b5 | 32 | #include "socket-util.h" |
bd06962a | 33 | #include "table.h" |
d171b584 | 34 | #include "timeval.h" |
bd06962a BP |
35 | #include "transaction.h" |
36 | #include "uuid.h" | |
37 | #include "util.h" | |
e6211adc | 38 | #include "openvswitch/vlog.h" |
bd06962a | 39 | |
d98e6007 | 40 | VLOG_DEFINE_THIS_MODULE(ovsdb_file); |
5136ce49 | 41 | |
ada496b5 BP |
42 | /* Minimum number of milliseconds between database compactions. */ |
43 | #define COMPACT_MIN_MSEC (10 * 60 * 1000) /* 10 minutes. */ | |
44 | ||
45 | /* Minimum number of milliseconds between trying to compact the database if | |
46 | * compacting fails. */ | |
47 | #define COMPACT_RETRY_MSEC (60 * 1000) /* 1 minute. */ | |
48 | ||
a3d573ed BP |
49 | /* A transaction being converted to JSON for writing to a file. */ |
50 | struct ovsdb_file_txn { | |
51 | struct json *json; /* JSON for the whole transaction. */ | |
52 | struct json *table_json; /* JSON for 'table''s transaction. */ | |
53 | struct ovsdb_table *table; /* Table described in 'table_json'. */ | |
54 | }; | |
55 | ||
56 | static void ovsdb_file_txn_init(struct ovsdb_file_txn *); | |
57 | static void ovsdb_file_txn_add_row(struct ovsdb_file_txn *, | |
58 | const struct ovsdb_row *old, | |
17d18afb BP |
59 | const struct ovsdb_row *new, |
60 | const unsigned long int *changed); | |
a3d573ed BP |
61 | static struct ovsdb_error *ovsdb_file_txn_commit(struct json *, |
62 | const char *comment, | |
63 | bool durable, | |
64 | struct ovsdb_log *); | |
1e19e50e BP |
65 | |
66 | static struct ovsdb_error *ovsdb_file_open__(const char *file_name, | |
67 | const struct ovsdb_schema *, | |
ada496b5 BP |
68 | bool read_only, struct ovsdb **, |
69 | struct ovsdb_file **); | |
70 | static struct ovsdb_error *ovsdb_file_txn_from_json( | |
2958f35b | 71 | struct ovsdb *, const struct json *, bool converting, struct ovsdb_txn **); |
ada496b5 BP |
72 | static struct ovsdb_error *ovsdb_file_create(struct ovsdb *, |
73 | struct ovsdb_log *, | |
74 | const char *file_name, | |
ada496b5 | 75 | unsigned int n_transactions, |
448b2003 | 76 | off_t snapshot_size, |
ada496b5 | 77 | struct ovsdb_file **filep); |
bd06962a | 78 | |
1e19e50e BP |
79 | /* Opens database 'file_name' and stores a pointer to the new database in |
80 | * '*dbp'. If 'read_only' is false, then the database will be locked and | |
81 | * changes to the database will be written to disk. If 'read_only' is true, | |
82 | * the database will not be locked and changes to the database will persist | |
83 | * only as long as the "struct ovsdb". | |
84 | * | |
ada496b5 BP |
85 | * If 'filep' is nonnull and 'read_only' is false, then on success sets |
86 | * '*filep' to an ovsdb_file that represents the open file. This ovsdb_file | |
87 | * persists until '*dbp' is destroyed. | |
88 | * | |
1e19e50e | 89 | * On success, returns NULL. On failure, returns an ovsdb_error (which the |
ada496b5 | 90 | * caller must destroy) and sets '*dbp' and '*filep' to NULL. */ |
bd06962a | 91 | struct ovsdb_error * |
ada496b5 BP |
92 | ovsdb_file_open(const char *file_name, bool read_only, |
93 | struct ovsdb **dbp, struct ovsdb_file **filep) | |
1e19e50e | 94 | { |
ada496b5 | 95 | return ovsdb_file_open__(file_name, NULL, read_only, dbp, filep); |
1e19e50e BP |
96 | } |
97 | ||
98 | /* Opens database 'file_name' with an alternate schema. The specified 'schema' | |
99 | * is used to interpret the data in 'file_name', ignoring the schema actually | |
100 | * stored in the file. Data in the file for tables or columns that do not | |
101 | * exist in 'schema' are ignored, but the ovsdb file format must otherwise be | |
102 | * observed, including column constraints. | |
103 | * | |
104 | * This function can be useful for upgrading or downgrading databases to | |
105 | * "almost-compatible" formats. | |
106 | * | |
107 | * The database will not be locked. Changes to the database will persist only | |
108 | * as long as the "struct ovsdb". | |
109 | * | |
110 | * On success, stores a pointer to the new database in '*dbp' and returns a | |
111 | * null pointer. On failure, returns an ovsdb_error (which the caller must | |
112 | * destroy) and sets '*dbp' to NULL. */ | |
113 | struct ovsdb_error * | |
114 | ovsdb_file_open_as_schema(const char *file_name, | |
115 | const struct ovsdb_schema *schema, | |
116 | struct ovsdb **dbp) | |
117 | { | |
ada496b5 | 118 | return ovsdb_file_open__(file_name, schema, true, dbp, NULL); |
1e19e50e BP |
119 | } |
120 | ||
121 | static struct ovsdb_error * | |
e1ebc8ce BP |
122 | ovsdb_file_open_log(const char *file_name, enum ovsdb_log_open_mode open_mode, |
123 | struct ovsdb_log **logp, struct ovsdb_schema **schemap) | |
bd06962a | 124 | { |
ada496b5 | 125 | struct ovsdb_schema *schema = NULL; |
ada496b5 | 126 | struct ovsdb_log *log = NULL; |
e1ebc8ce BP |
127 | struct ovsdb_error *error; |
128 | struct json *json = NULL; | |
ada496b5 | 129 | |
cb22974d | 130 | ovs_assert(logp || schemap); |
bd06962a | 131 | |
19b276cb | 132 | error = ovsdb_log_open(file_name, OVSDB_MAGIC, open_mode, -1, &log); |
bd06962a | 133 | if (error) { |
ada496b5 | 134 | goto error; |
bd06962a BP |
135 | } |
136 | ||
137 | error = ovsdb_log_read(log, &json); | |
138 | if (error) { | |
ada496b5 | 139 | goto error; |
bd06962a | 140 | } else if (!json) { |
ada496b5 BP |
141 | error = ovsdb_io_error(EOF, "%s: database file contains no schema", |
142 | file_name); | |
143 | goto error; | |
bd06962a BP |
144 | } |
145 | ||
e1ebc8ce | 146 | if (schemap) { |
1e19e50e BP |
147 | error = ovsdb_schema_from_json(json, &schema); |
148 | if (error) { | |
ada496b5 BP |
149 | error = ovsdb_wrap_error(error, |
150 | "failed to parse \"%s\" as ovsdb schema", | |
151 | file_name); | |
152 | goto error; | |
1e19e50e | 153 | } |
bd06962a BP |
154 | } |
155 | json_destroy(json); | |
156 | ||
e1ebc8ce BP |
157 | if (logp) { |
158 | *logp = log; | |
159 | } else { | |
160 | ovsdb_log_close(log); | |
161 | } | |
162 | if (schemap) { | |
163 | *schemap = schema; | |
164 | } | |
165 | return NULL; | |
166 | ||
167 | error: | |
168 | ovsdb_log_close(log); | |
169 | json_destroy(json); | |
170 | if (logp) { | |
171 | *logp = NULL; | |
172 | } | |
173 | if (schemap) { | |
174 | *schemap = NULL; | |
175 | } | |
176 | return error; | |
177 | } | |
178 | ||
179 | static struct ovsdb_error * | |
180 | ovsdb_file_open__(const char *file_name, | |
181 | const struct ovsdb_schema *alternate_schema, | |
182 | bool read_only, struct ovsdb **dbp, | |
183 | struct ovsdb_file **filep) | |
184 | { | |
185 | enum ovsdb_log_open_mode open_mode; | |
e1ebc8ce BP |
186 | struct ovsdb_schema *schema = NULL; |
187 | struct ovsdb_error *error; | |
188 | struct ovsdb_log *log; | |
189 | struct json *json; | |
190 | struct ovsdb *db = NULL; | |
191 | ||
192 | /* In read-only mode there is no ovsdb_file so 'filep' must be null. */ | |
cb22974d | 193 | ovs_assert(!(read_only && filep)); |
e1ebc8ce BP |
194 | |
195 | open_mode = read_only ? OVSDB_LOG_READ_ONLY : OVSDB_LOG_READ_WRITE; | |
196 | error = ovsdb_file_open_log(file_name, open_mode, &log, | |
197 | alternate_schema ? NULL : &schema); | |
198 | if (error) { | |
199 | goto error; | |
200 | } | |
201 | ||
202 | db = ovsdb_create(schema ? schema : ovsdb_schema_clone(alternate_schema)); | |
ada496b5 | 203 | |
448b2003 BP |
204 | /* When a log gets big, we compact it into a new log that initially has |
205 | * only a single transaction that represents the entire state of the | |
206 | * database. Thus, we consider the first transaction in the database to be | |
207 | * the snapshot. We measure its size to later influence the minimum log | |
208 | * size before compacting again. | |
209 | * | |
210 | * The schema precedes the snapshot in the log; we could compensate for its | |
211 | * size, but it's just not that important. */ | |
212 | off_t snapshot_size = 0; | |
213 | unsigned int n_transactions = 0; | |
bd06962a BP |
214 | while ((error = ovsdb_log_read(log, &json)) == NULL && json) { |
215 | struct ovsdb_txn *txn; | |
216 | ||
1e19e50e | 217 | error = ovsdb_file_txn_from_json(db, json, alternate_schema != NULL, |
2958f35b | 218 | &txn); |
bd06962a BP |
219 | json_destroy(json); |
220 | if (error) { | |
43675e26 | 221 | ovsdb_log_unread(log); |
bd06962a BP |
222 | break; |
223 | } | |
224 | ||
ada496b5 | 225 | n_transactions++; |
43675e26 BP |
226 | error = ovsdb_txn_commit(txn, false); |
227 | if (error) { | |
228 | ovsdb_log_unread(log); | |
229 | break; | |
230 | } | |
448b2003 BP |
231 | |
232 | if (n_transactions == 1) { | |
233 | snapshot_size = ovsdb_log_get_offset(log); | |
234 | } | |
bd06962a BP |
235 | } |
236 | if (error) { | |
ada496b5 BP |
237 | /* Log error but otherwise ignore it. Probably the database just got |
238 | * truncated due to power failure etc. and we should use its current | |
239 | * contents. */ | |
3865965d | 240 | char *msg = ovsdb_error_to_string_free(error); |
ff9f6644 | 241 | VLOG_ERR("%s", msg); |
bd06962a | 242 | free(msg); |
bd06962a BP |
243 | } |
244 | ||
245 | if (!read_only) { | |
ada496b5 BP |
246 | struct ovsdb_file *file; |
247 | ||
448b2003 BP |
248 | error = ovsdb_file_create(db, log, file_name, n_transactions, |
249 | snapshot_size, &file); | |
ada496b5 BP |
250 | if (error) { |
251 | goto error; | |
252 | } | |
253 | if (filep) { | |
254 | *filep = file; | |
255 | } | |
bd06962a BP |
256 | } else { |
257 | ovsdb_log_close(log); | |
258 | } | |
259 | ||
260 | *dbp = db; | |
261 | return NULL; | |
ada496b5 BP |
262 | |
263 | error: | |
264 | *dbp = NULL; | |
265 | if (filep) { | |
266 | *filep = NULL; | |
267 | } | |
268 | ovsdb_destroy(db); | |
ada496b5 BP |
269 | ovsdb_log_close(log); |
270 | return error; | |
bd06962a BP |
271 | } |
272 | ||
1e19e50e BP |
273 | static struct ovsdb_error * |
274 | ovsdb_file_update_row_from_json(struct ovsdb_row *row, bool converting, | |
275 | const struct json *json) | |
276 | { | |
277 | struct ovsdb_table_schema *schema = row->table->schema; | |
278 | struct ovsdb_error *error; | |
279 | struct shash_node *node; | |
280 | ||
281 | if (json->type != JSON_OBJECT) { | |
282 | return ovsdb_syntax_error(json, NULL, "row must be JSON object"); | |
283 | } | |
284 | ||
285 | SHASH_FOR_EACH (node, json_object(json)) { | |
286 | const char *column_name = node->name; | |
287 | const struct ovsdb_column *column; | |
288 | struct ovsdb_datum datum; | |
289 | ||
290 | column = ovsdb_table_schema_get_column(schema, column_name); | |
291 | if (!column) { | |
292 | if (converting) { | |
293 | continue; | |
294 | } | |
295 | return ovsdb_syntax_error(json, "unknown column", | |
296 | "No column %s in table %s.", | |
297 | column_name, schema->name); | |
298 | } | |
299 | ||
300 | error = ovsdb_datum_from_json(&datum, &column->type, node->data, NULL); | |
301 | if (error) { | |
302 | return error; | |
303 | } | |
304 | ovsdb_datum_swap(&row->fields[column->index], &datum); | |
305 | ovsdb_datum_destroy(&datum, &column->type); | |
306 | } | |
307 | ||
308 | return NULL; | |
309 | } | |
310 | ||
bd06962a BP |
311 | static struct ovsdb_error * |
312 | ovsdb_file_txn_row_from_json(struct ovsdb_txn *txn, struct ovsdb_table *table, | |
1e19e50e | 313 | bool converting, |
bd06962a BP |
314 | const struct uuid *row_uuid, struct json *json) |
315 | { | |
316 | const struct ovsdb_row *row = ovsdb_table_get_row(table, row_uuid); | |
317 | if (json->type == JSON_NULL) { | |
318 | if (!row) { | |
319 | return ovsdb_syntax_error(NULL, NULL, "transaction deletes " | |
320 | "row "UUID_FMT" that does not exist", | |
321 | UUID_ARGS(row_uuid)); | |
322 | } | |
323 | ovsdb_txn_row_delete(txn, row); | |
324 | return NULL; | |
325 | } else if (row) { | |
1e19e50e BP |
326 | return ovsdb_file_update_row_from_json(ovsdb_txn_row_modify(txn, row), |
327 | converting, json); | |
bd06962a BP |
328 | } else { |
329 | struct ovsdb_error *error; | |
330 | struct ovsdb_row *new; | |
331 | ||
332 | new = ovsdb_row_create(table); | |
333 | *ovsdb_row_get_uuid_rw(new) = *row_uuid; | |
1e19e50e | 334 | error = ovsdb_file_update_row_from_json(new, converting, json); |
bd06962a BP |
335 | if (error) { |
336 | ovsdb_row_destroy(new); | |
3697c062 BP |
337 | } else { |
338 | ovsdb_txn_row_insert(txn, new); | |
bd06962a | 339 | } |
bd06962a BP |
340 | return error; |
341 | } | |
342 | } | |
343 | ||
344 | static struct ovsdb_error * | |
345 | ovsdb_file_txn_table_from_json(struct ovsdb_txn *txn, | |
1e19e50e BP |
346 | struct ovsdb_table *table, |
347 | bool converting, struct json *json) | |
bd06962a BP |
348 | { |
349 | struct shash_node *node; | |
350 | ||
351 | if (json->type != JSON_OBJECT) { | |
352 | return ovsdb_syntax_error(json, NULL, "object expected"); | |
353 | } | |
354 | ||
355 | SHASH_FOR_EACH (node, json->u.object) { | |
356 | const char *uuid_string = node->name; | |
357 | struct json *txn_row_json = node->data; | |
358 | struct ovsdb_error *error; | |
359 | struct uuid row_uuid; | |
360 | ||
361 | if (!uuid_from_string(&row_uuid, uuid_string)) { | |
362 | return ovsdb_syntax_error(json, NULL, "\"%s\" is not a valid UUID", | |
363 | uuid_string); | |
364 | } | |
365 | ||
1e19e50e BP |
366 | error = ovsdb_file_txn_row_from_json(txn, table, converting, |
367 | &row_uuid, txn_row_json); | |
bd06962a BP |
368 | if (error) { |
369 | return error; | |
370 | } | |
371 | } | |
372 | ||
373 | return NULL; | |
374 | } | |
375 | ||
ada496b5 BP |
376 | /* Converts 'json' to an ovsdb_txn for 'db', storing the new transaction in |
377 | * '*txnp'. Returns NULL if successful, otherwise an error. | |
378 | * | |
379 | * If 'converting' is true, then unknown table and column names are ignored | |
380 | * (which can ease upgrading and downgrading schemas); otherwise, they are | |
2958f35b | 381 | * treated as errors. */ |
bd06962a BP |
382 | static struct ovsdb_error * |
383 | ovsdb_file_txn_from_json(struct ovsdb *db, const struct json *json, | |
2958f35b | 384 | bool converting, struct ovsdb_txn **txnp) |
bd06962a BP |
385 | { |
386 | struct ovsdb_error *error; | |
387 | struct shash_node *node; | |
388 | struct ovsdb_txn *txn; | |
389 | ||
390 | *txnp = NULL; | |
ada496b5 | 391 | |
bd06962a BP |
392 | if (json->type != JSON_OBJECT) { |
393 | return ovsdb_syntax_error(json, NULL, "object expected"); | |
394 | } | |
395 | ||
396 | txn = ovsdb_txn_create(db); | |
397 | SHASH_FOR_EACH (node, json->u.object) { | |
398 | const char *table_name = node->name; | |
ada496b5 | 399 | struct json *node_json = node->data; |
bd06962a BP |
400 | struct ovsdb_table *table; |
401 | ||
402 | table = shash_find_data(&db->tables, table_name); | |
403 | if (!table) { | |
d171b584 | 404 | if (!strcmp(table_name, "_date") |
ada496b5 | 405 | && node_json->type == JSON_INTEGER) { |
ada496b5 BP |
406 | continue; |
407 | } else if (!strcmp(table_name, "_comment") || converting) { | |
d171b584 BP |
408 | continue; |
409 | } | |
410 | ||
bd06962a BP |
411 | error = ovsdb_syntax_error(json, "unknown table", |
412 | "No table named %s.", table_name); | |
413 | goto error; | |
414 | } | |
415 | ||
1e19e50e | 416 | error = ovsdb_file_txn_table_from_json(txn, table, converting, |
ada496b5 | 417 | node_json); |
bd06962a BP |
418 | if (error) { |
419 | goto error; | |
420 | } | |
421 | } | |
422 | *txnp = txn; | |
423 | return NULL; | |
424 | ||
425 | error: | |
426 | ovsdb_txn_abort(txn); | |
427 | return error; | |
428 | } | |
1e19e50e | 429 | |
ada496b5 BP |
430 | static struct ovsdb_error * |
431 | ovsdb_file_save_copy__(const char *file_name, int locking, | |
432 | const char *comment, const struct ovsdb *db, | |
433 | struct ovsdb_log **logp) | |
1e19e50e BP |
434 | { |
435 | const struct shash_node *node; | |
436 | struct ovsdb_file_txn ftxn; | |
437 | struct ovsdb_error *error; | |
438 | struct ovsdb_log *log; | |
439 | struct json *json; | |
440 | ||
19b276cb | 441 | error = ovsdb_log_open(file_name, OVSDB_MAGIC, |
1e0b7e94 | 442 | OVSDB_LOG_CREATE_EXCL, locking, &log); |
1e19e50e BP |
443 | if (error) { |
444 | return error; | |
445 | } | |
446 | ||
447 | /* Write schema. */ | |
448 | json = ovsdb_schema_to_json(db->schema); | |
449 | error = ovsdb_log_write(log, json); | |
450 | json_destroy(json); | |
451 | if (error) { | |
452 | goto exit; | |
453 | } | |
454 | ||
455 | /* Write data. */ | |
456 | ovsdb_file_txn_init(&ftxn); | |
457 | SHASH_FOR_EACH (node, &db->tables) { | |
458 | const struct ovsdb_table *table = node->data; | |
459 | const struct ovsdb_row *row; | |
460 | ||
4e8e4213 | 461 | HMAP_FOR_EACH (row, hmap_node, &table->rows) { |
17d18afb | 462 | ovsdb_file_txn_add_row(&ftxn, NULL, row, NULL); |
1e19e50e BP |
463 | } |
464 | } | |
465 | error = ovsdb_file_txn_commit(ftxn.json, comment, true, log); | |
466 | ||
467 | exit: | |
ada496b5 BP |
468 | if (logp) { |
469 | if (!error) { | |
470 | *logp = log; | |
471 | log = NULL; | |
472 | } else { | |
473 | *logp = NULL; | |
474 | } | |
475 | } | |
1e19e50e BP |
476 | ovsdb_log_close(log); |
477 | if (error) { | |
478 | remove(file_name); | |
479 | } | |
480 | return error; | |
481 | } | |
ada496b5 BP |
482 | |
483 | /* Saves a snapshot of 'db''s current contents as 'file_name'. If 'comment' is | |
484 | * nonnull, then it is added along with the data contents and can be viewed | |
485 | * with "ovsdb-tool show-log". | |
486 | * | |
487 | * 'locking' is passed along to ovsdb_log_open() untouched. */ | |
488 | struct ovsdb_error * | |
489 | ovsdb_file_save_copy(const char *file_name, int locking, | |
490 | const char *comment, const struct ovsdb *db) | |
491 | { | |
492 | return ovsdb_file_save_copy__(file_name, locking, comment, db, NULL); | |
493 | } | |
e1ebc8ce BP |
494 | |
495 | /* Opens database 'file_name', reads its schema, and closes it. On success, | |
496 | * stores the schema into '*schemap' and returns NULL; the caller then owns the | |
497 | * schema. On failure, returns an ovsdb_error (which the caller must destroy) | |
498 | * and sets '*dbp' to NULL. */ | |
499 | struct ovsdb_error * | |
500 | ovsdb_file_read_schema(const char *file_name, struct ovsdb_schema **schemap) | |
501 | { | |
cb22974d | 502 | ovs_assert(schemap != NULL); |
e1ebc8ce BP |
503 | return ovsdb_file_open_log(file_name, OVSDB_LOG_READ_ONLY, NULL, schemap); |
504 | } | |
bd06962a BP |
505 | \f |
506 | /* Replica implementation. */ | |
507 | ||
afe20d5c | 508 | struct ovsdb_file { |
bd06962a | 509 | struct ovsdb_replica replica; |
ada496b5 | 510 | struct ovsdb *db; |
bd06962a | 511 | struct ovsdb_log *log; |
ada496b5 | 512 | char *file_name; |
2958f35b | 513 | long long int last_compact; |
ada496b5 BP |
514 | long long int next_compact; |
515 | unsigned int n_transactions; | |
448b2003 | 516 | off_t snapshot_size; |
bd06962a BP |
517 | }; |
518 | ||
afe20d5c | 519 | static const struct ovsdb_replica_class ovsdb_file_class; |
bd06962a | 520 | |
ada496b5 BP |
521 | static struct ovsdb_error * |
522 | ovsdb_file_create(struct ovsdb *db, struct ovsdb_log *log, | |
523 | const char *file_name, | |
448b2003 | 524 | unsigned int n_transactions, off_t snapshot_size, |
ada496b5 | 525 | struct ovsdb_file **filep) |
bd06962a | 526 | { |
ada496b5 | 527 | struct ovsdb_file *file; |
a35ae81c | 528 | char *deref_name; |
ada496b5 BP |
529 | char *abs_name; |
530 | ||
531 | /* Use the absolute name of the file because ovsdb-server opens its | |
532 | * database before daemonize() chdirs to "/". */ | |
a35ae81c BP |
533 | deref_name = follow_symlinks(file_name); |
534 | abs_name = abs_file_name(NULL, deref_name); | |
535 | free(deref_name); | |
ada496b5 BP |
536 | if (!abs_name) { |
537 | *filep = NULL; | |
538 | return ovsdb_io_error(0, "could not determine current " | |
539 | "working directory"); | |
540 | } | |
541 | ||
542 | file = xmalloc(sizeof *file); | |
afe20d5c | 543 | ovsdb_replica_init(&file->replica, &ovsdb_file_class); |
ada496b5 | 544 | file->db = db; |
afe20d5c | 545 | file->log = log; |
ada496b5 | 546 | file->file_name = abs_name; |
2958f35b PI |
547 | file->last_compact = time_msec(); |
548 | file->next_compact = file->last_compact + COMPACT_MIN_MSEC; | |
448b2003 | 549 | file->snapshot_size = snapshot_size; |
ada496b5 | 550 | file->n_transactions = n_transactions; |
afe20d5c | 551 | ovsdb_add_replica(db, &file->replica); |
ada496b5 BP |
552 | |
553 | *filep = file; | |
554 | return NULL; | |
bd06962a BP |
555 | } |
556 | ||
afe20d5c BP |
557 | static struct ovsdb_file * |
558 | ovsdb_file_cast(struct ovsdb_replica *replica) | |
bd06962a | 559 | { |
cb22974d | 560 | ovs_assert(replica->class == &ovsdb_file_class); |
afe20d5c | 561 | return CONTAINER_OF(replica, struct ovsdb_file, replica); |
bd06962a BP |
562 | } |
563 | ||
bd06962a | 564 | static bool |
afe20d5c BP |
565 | ovsdb_file_change_cb(const struct ovsdb_row *old, |
566 | const struct ovsdb_row *new, | |
567 | const unsigned long int *changed, | |
568 | void *ftxn_) | |
a3d573ed BP |
569 | { |
570 | struct ovsdb_file_txn *ftxn = ftxn_; | |
17d18afb | 571 | ovsdb_file_txn_add_row(ftxn, old, new, changed); |
a3d573ed BP |
572 | return true; |
573 | } | |
574 | ||
4d0a31b6 BP |
575 | struct json * |
576 | ovsdb_file_txn_annotate(struct json *json, const char *comment) | |
577 | { | |
578 | if (!json) { | |
579 | json = json_object_create(); | |
580 | } | |
581 | if (comment) { | |
582 | json_object_put_string(json, "_comment", comment); | |
583 | } | |
584 | json_object_put(json, "_date", json_integer_create(time_wall_msec())); | |
585 | return json; | |
586 | } | |
587 | ||
a3d573ed | 588 | static struct ovsdb_error * |
afe20d5c BP |
589 | ovsdb_file_commit(struct ovsdb_replica *replica, |
590 | const struct ovsdb_txn *txn, bool durable) | |
a3d573ed | 591 | { |
afe20d5c | 592 | struct ovsdb_file *file = ovsdb_file_cast(replica); |
a3d573ed | 593 | struct ovsdb_file_txn ftxn; |
ada496b5 | 594 | struct ovsdb_error *error; |
a3d573ed BP |
595 | |
596 | ovsdb_file_txn_init(&ftxn); | |
afe20d5c | 597 | ovsdb_txn_for_each_change(txn, ovsdb_file_change_cb, &ftxn); |
a3d573ed BP |
598 | if (!ftxn.json) { |
599 | /* Nothing to commit. */ | |
600 | return NULL; | |
601 | } | |
602 | ||
ada496b5 BP |
603 | error = ovsdb_file_txn_commit(ftxn.json, ovsdb_txn_get_comment(txn), |
604 | durable, file->log); | |
605 | if (error) { | |
606 | return error; | |
607 | } | |
608 | file->n_transactions++; | |
609 | ||
19616e46 BP |
610 | /* If it has been at least COMPACT_MIN_MSEC ms since the last time we |
611 | * compacted (or at least COMPACT_RETRY_MSEC ms since the last time we | |
ada496b5 | 612 | * tried), and if there are at least 100 transactions in the database, and |
448b2003 BP |
613 | * if the database is at least 10 MB, and the database is at least 4x the |
614 | * size of the previous snapshot, then compact the database. */ | |
615 | off_t log_size = ovsdb_log_get_offset(file->log); | |
ada496b5 BP |
616 | if (time_msec() >= file->next_compact |
617 | && file->n_transactions >= 100 | |
448b2003 BP |
618 | && log_size >= 10 * 1024 * 1024 |
619 | && log_size / 4 >= file->snapshot_size) | |
ada496b5 BP |
620 | { |
621 | error = ovsdb_file_compact(file); | |
622 | if (error) { | |
3865965d | 623 | char *s = ovsdb_error_to_string_free(error); |
ada496b5 | 624 | VLOG_WARN("%s: compacting database failed (%s), retrying in " |
8c7ea6a0 BP |
625 | "%d seconds", |
626 | file->file_name, s, COMPACT_RETRY_MSEC / 1000); | |
ada496b5 BP |
627 | free(s); |
628 | ||
629 | file->next_compact = time_msec() + COMPACT_RETRY_MSEC; | |
630 | } | |
631 | } | |
632 | ||
633 | return NULL; | |
634 | } | |
635 | ||
84a13f61 AS |
636 | /* Rename 'old' to 'new', replacing 'new' if it exists. Returns NULL if |
637 | * successful, otherwise an ovsdb_error that the caller must destroy. */ | |
638 | static struct ovsdb_error * OVS_WARN_UNUSED_RESULT | |
639 | ovsdb_rename(const char *old, const char *new) | |
640 | { | |
641 | #ifdef _WIN32 | |
642 | int error = (MoveFileEx(old, new, MOVEFILE_REPLACE_EXISTING | |
643 | | MOVEFILE_WRITE_THROUGH | MOVEFILE_COPY_ALLOWED) | |
644 | ? 0 : EACCES); | |
645 | #else | |
646 | int error = rename(old, new) ? errno : 0; | |
647 | #endif | |
648 | ||
649 | return (error | |
650 | ? ovsdb_io_error(error, "failed to rename \"%s\" to \"%s\"", | |
651 | old, new) | |
652 | : NULL); | |
653 | } | |
654 | ||
ada496b5 BP |
655 | struct ovsdb_error * |
656 | ovsdb_file_compact(struct ovsdb_file *file) | |
657 | { | |
658 | struct ovsdb_log *new_log = NULL; | |
659 | struct lockfile *tmp_lock = NULL; | |
660 | struct ovsdb_error *error; | |
661 | char *tmp_name = NULL; | |
662 | char *comment = NULL; | |
663 | int retval; | |
664 | ||
665 | comment = xasprintf("compacting database online " | |
666 | "(%.3f seconds old, %u transactions, %llu bytes)", | |
2958f35b | 667 | (time_wall_msec() - file->last_compact) / 1000.0, |
ada496b5 BP |
668 | file->n_transactions, |
669 | (unsigned long long) ovsdb_log_get_offset(file->log)); | |
670 | VLOG_INFO("%s: %s", file->file_name, comment); | |
671 | ||
672 | /* Commit the old version, so that we can be assured that we'll eventually | |
673 | * have either the old or the new version. */ | |
674 | error = ovsdb_log_commit(file->log); | |
675 | if (error) { | |
676 | goto exit; | |
677 | } | |
678 | ||
679 | /* Lock temporary file. */ | |
680 | tmp_name = xasprintf("%s.tmp", file->file_name); | |
4770e795 | 681 | retval = lockfile_lock(tmp_name, &tmp_lock); |
ada496b5 BP |
682 | if (retval) { |
683 | error = ovsdb_io_error(retval, "could not get lock on %s", tmp_name); | |
684 | goto exit; | |
685 | } | |
686 | ||
687 | /* Remove temporary file. (It might not exist.) */ | |
688 | if (unlink(tmp_name) < 0 && errno != ENOENT) { | |
689 | error = ovsdb_io_error(errno, "failed to remove %s", tmp_name); | |
690 | goto exit; | |
691 | } | |
692 | ||
693 | /* Save a copy. */ | |
694 | error = ovsdb_file_save_copy__(tmp_name, false, comment, file->db, | |
695 | &new_log); | |
696 | if (error) { | |
697 | goto exit; | |
698 | } | |
699 | ||
84a13f61 AS |
700 | /* Replace original file by the temporary file. |
701 | * | |
702 | * We support two strategies: | |
703 | * | |
704 | * - The preferred strategy is to rename the temporary file over the | |
705 | * original one in-place, then close the original one. This works on | |
706 | * Unix-like systems. It does not work on Windows, which does not | |
707 | * allow open files to be renamed. The approach has the advantage | |
708 | * that, at any point, we can drop back to something that already | |
709 | * works. | |
710 | * | |
711 | * - Alternatively, we can close both files, rename, then open the new | |
712 | * file (which now has the original name). This works on all | |
713 | * systems, but if reopening the file fails then we're stuck and have | |
714 | * to abort (XXX although it would be better to retry). | |
715 | * | |
716 | * We make the strategy a variable instead of an #ifdef to make it easier | |
717 | * to test both strategies on Unix-like systems, and to make the code | |
718 | * easier to read. */ | |
719 | #ifdef _WIN32 | |
720 | bool rename_open_files = false; | |
721 | #else | |
722 | bool rename_open_files = true; | |
723 | #endif | |
724 | if (!rename_open_files) { | |
725 | ovsdb_log_close(file->log); | |
726 | ovsdb_log_close(new_log); | |
727 | file->log = NULL; | |
728 | new_log = NULL; | |
729 | } | |
730 | error = ovsdb_rename(tmp_name, file->file_name); | |
731 | if (error) { | |
ada496b5 BP |
732 | goto exit; |
733 | } | |
84a13f61 AS |
734 | if (rename_open_files) { |
735 | fsync_parent_dir(file->file_name); | |
ada496b5 BP |
736 | ovsdb_log_close(file->log); |
737 | file->log = new_log; | |
ada496b5 | 738 | } else { |
84a13f61 AS |
739 | /* Re-open the log. This skips past the schema log record. */ |
740 | error = ovsdb_file_open_log(file->file_name, OVSDB_LOG_READ_WRITE, | |
741 | &file->log, NULL); | |
742 | if (error) { | |
743 | ovs_fatal(0, "could not reopen database"); | |
744 | } | |
745 | ||
746 | /* Skip past the data log reecord. */ | |
747 | struct json *json; | |
748 | error = ovsdb_log_read(file->log, &json); | |
749 | if (error) { | |
750 | ovs_fatal(0, "error reading database"); | |
751 | } | |
752 | json_destroy(json); | |
753 | } | |
754 | ||
755 | /* Success! */ | |
756 | file->last_compact = time_msec(); | |
757 | file->next_compact = file->last_compact + COMPACT_MIN_MSEC; | |
758 | file->n_transactions = 1; | |
759 | ||
760 | exit: | |
761 | if (error) { | |
ada496b5 BP |
762 | ovsdb_log_close(new_log); |
763 | if (tmp_lock) { | |
764 | unlink(tmp_name); | |
765 | } | |
766 | } | |
767 | ||
768 | lockfile_unlock(tmp_lock); | |
769 | free(tmp_name); | |
770 | free(comment); | |
771 | ||
772 | return error; | |
a3d573ed BP |
773 | } |
774 | ||
775 | static void | |
afe20d5c | 776 | ovsdb_file_destroy(struct ovsdb_replica *replica) |
a3d573ed | 777 | { |
afe20d5c | 778 | struct ovsdb_file *file = ovsdb_file_cast(replica); |
a3d573ed | 779 | |
afe20d5c | 780 | ovsdb_log_close(file->log); |
ada496b5 | 781 | free(file->file_name); |
afe20d5c | 782 | free(file); |
a3d573ed BP |
783 | } |
784 | ||
afe20d5c BP |
785 | static const struct ovsdb_replica_class ovsdb_file_class = { |
786 | ovsdb_file_commit, | |
787 | ovsdb_file_destroy | |
a3d573ed BP |
788 | }; |
789 | \f | |
790 | static void | |
791 | ovsdb_file_txn_init(struct ovsdb_file_txn *ftxn) | |
792 | { | |
793 | ftxn->json = NULL; | |
794 | ftxn->table_json = NULL; | |
795 | ftxn->table = NULL; | |
796 | } | |
797 | ||
798 | static void | |
799 | ovsdb_file_txn_add_row(struct ovsdb_file_txn *ftxn, | |
800 | const struct ovsdb_row *old, | |
17d18afb BP |
801 | const struct ovsdb_row *new, |
802 | const unsigned long int *changed) | |
bd06962a | 803 | { |
bd06962a BP |
804 | struct json *row; |
805 | ||
806 | if (!new) { | |
807 | row = json_null_create(); | |
808 | } else { | |
809 | struct shash_node *node; | |
810 | ||
88942565 | 811 | row = old ? NULL : json_object_create(); |
bd06962a BP |
812 | SHASH_FOR_EACH (node, &new->table->schema->columns) { |
813 | const struct ovsdb_column *column = node->data; | |
814 | const struct ovsdb_type *type = &column->type; | |
815 | unsigned int idx = column->index; | |
816 | ||
817 | if (idx != OVSDB_COL_UUID && column->persistent | |
c532bf9d | 818 | && (old |
17d18afb | 819 | ? bitmap_is_set(changed, idx) |
c532bf9d | 820 | : !ovsdb_datum_is_default(&new->fields[idx], type))) |
bd06962a BP |
821 | { |
822 | if (!row) { | |
823 | row = json_object_create(); | |
824 | } | |
825 | json_object_put(row, column->name, | |
826 | ovsdb_datum_to_json(&new->fields[idx], type)); | |
827 | } | |
828 | } | |
829 | } | |
830 | ||
831 | if (row) { | |
832 | struct ovsdb_table *table = new ? new->table : old->table; | |
833 | char uuid[UUID_LEN + 1]; | |
834 | ||
a3d573ed | 835 | if (table != ftxn->table) { |
bd06962a | 836 | /* Create JSON object for transaction overall. */ |
a3d573ed BP |
837 | if (!ftxn->json) { |
838 | ftxn->json = json_object_create(); | |
bd06962a BP |
839 | } |
840 | ||
841 | /* Create JSON object for transaction on this table. */ | |
a3d573ed BP |
842 | ftxn->table_json = json_object_create(); |
843 | ftxn->table = table; | |
844 | json_object_put(ftxn->json, table->schema->name, ftxn->table_json); | |
bd06962a BP |
845 | } |
846 | ||
847 | /* Add row to transaction for this table. */ | |
848 | snprintf(uuid, sizeof uuid, | |
849 | UUID_FMT, UUID_ARGS(ovsdb_row_get_uuid(new ? new : old))); | |
a3d573ed | 850 | json_object_put(ftxn->table_json, uuid, row); |
bd06962a | 851 | } |
bd06962a BP |
852 | } |
853 | ||
854 | static struct ovsdb_error * | |
a3d573ed BP |
855 | ovsdb_file_txn_commit(struct json *json, const char *comment, |
856 | bool durable, struct ovsdb_log *log) | |
bd06962a | 857 | { |
bd06962a | 858 | struct ovsdb_error *error; |
bd06962a | 859 | |
4d0a31b6 | 860 | json = ovsdb_file_txn_annotate(json, comment); |
a3d573ed BP |
861 | error = ovsdb_log_write(log, json); |
862 | json_destroy(json); | |
bd06962a BP |
863 | if (error) { |
864 | return ovsdb_wrap_error(error, "writing transaction failed"); | |
865 | } | |
866 | ||
867 | if (durable) { | |
a3d573ed | 868 | error = ovsdb_log_commit(log); |
bd06962a BP |
869 | if (error) { |
870 | return ovsdb_wrap_error(error, "committing transaction failed"); | |
871 | } | |
872 | } | |
873 | ||
874 | return NULL; | |
875 | } |