]> git.proxmox.com Git - ovs.git/blame - ovsdb/file.c
log: Add new open mode OVSDB_LOG_CREATE_EXCL.
[ovs.git] / ovsdb / file.c
CommitLineData
448b2003 1/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2016 Nicira, Inc.
bd06962a
BP
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#include <config.h>
17
18#include "file.h"
19
ada496b5 20#include <errno.h>
bd06962a 21#include <fcntl.h>
ada496b5 22#include <unistd.h>
bd06962a 23
17d18afb 24#include "bitmap.h"
bd06962a
BP
25#include "column.h"
26#include "log.h"
ee89ea7b 27#include "openvswitch/json.h"
ada496b5 28#include "lockfile.h"
bd06962a
BP
29#include "ovsdb.h"
30#include "ovsdb-error.h"
31#include "row.h"
ada496b5 32#include "socket-util.h"
bd06962a 33#include "table.h"
d171b584 34#include "timeval.h"
bd06962a
BP
35#include "transaction.h"
36#include "uuid.h"
37#include "util.h"
e6211adc 38#include "openvswitch/vlog.h"
bd06962a 39
d98e6007 40VLOG_DEFINE_THIS_MODULE(ovsdb_file);
5136ce49 41
ada496b5
BP
42/* Minimum number of milliseconds between database compactions. */
43#define COMPACT_MIN_MSEC (10 * 60 * 1000) /* 10 minutes. */
44
45/* Minimum number of milliseconds between trying to compact the database if
46 * compacting fails. */
47#define COMPACT_RETRY_MSEC (60 * 1000) /* 1 minute. */
48
a3d573ed
BP
49/* A transaction being converted to JSON for writing to a file. */
50struct ovsdb_file_txn {
51 struct json *json; /* JSON for the whole transaction. */
52 struct json *table_json; /* JSON for 'table''s transaction. */
53 struct ovsdb_table *table; /* Table described in 'table_json'. */
54};
55
56static void ovsdb_file_txn_init(struct ovsdb_file_txn *);
57static void ovsdb_file_txn_add_row(struct ovsdb_file_txn *,
58 const struct ovsdb_row *old,
17d18afb
BP
59 const struct ovsdb_row *new,
60 const unsigned long int *changed);
a3d573ed
BP
61static struct ovsdb_error *ovsdb_file_txn_commit(struct json *,
62 const char *comment,
63 bool durable,
64 struct ovsdb_log *);
1e19e50e
BP
65
66static struct ovsdb_error *ovsdb_file_open__(const char *file_name,
67 const struct ovsdb_schema *,
ada496b5
BP
68 bool read_only, struct ovsdb **,
69 struct ovsdb_file **);
70static struct ovsdb_error *ovsdb_file_txn_from_json(
2958f35b 71 struct ovsdb *, const struct json *, bool converting, struct ovsdb_txn **);
ada496b5
BP
72static struct ovsdb_error *ovsdb_file_create(struct ovsdb *,
73 struct ovsdb_log *,
74 const char *file_name,
ada496b5 75 unsigned int n_transactions,
448b2003 76 off_t snapshot_size,
ada496b5 77 struct ovsdb_file **filep);
bd06962a 78
1e19e50e
BP
79/* Opens database 'file_name' and stores a pointer to the new database in
80 * '*dbp'. If 'read_only' is false, then the database will be locked and
81 * changes to the database will be written to disk. If 'read_only' is true,
82 * the database will not be locked and changes to the database will persist
83 * only as long as the "struct ovsdb".
84 *
ada496b5
BP
85 * If 'filep' is nonnull and 'read_only' is false, then on success sets
86 * '*filep' to an ovsdb_file that represents the open file. This ovsdb_file
87 * persists until '*dbp' is destroyed.
88 *
1e19e50e 89 * On success, returns NULL. On failure, returns an ovsdb_error (which the
ada496b5 90 * caller must destroy) and sets '*dbp' and '*filep' to NULL. */
bd06962a 91struct ovsdb_error *
ada496b5
BP
92ovsdb_file_open(const char *file_name, bool read_only,
93 struct ovsdb **dbp, struct ovsdb_file **filep)
1e19e50e 94{
ada496b5 95 return ovsdb_file_open__(file_name, NULL, read_only, dbp, filep);
1e19e50e
BP
96}
97
98/* Opens database 'file_name' with an alternate schema. The specified 'schema'
99 * is used to interpret the data in 'file_name', ignoring the schema actually
100 * stored in the file. Data in the file for tables or columns that do not
101 * exist in 'schema' are ignored, but the ovsdb file format must otherwise be
102 * observed, including column constraints.
103 *
104 * This function can be useful for upgrading or downgrading databases to
105 * "almost-compatible" formats.
106 *
107 * The database will not be locked. Changes to the database will persist only
108 * as long as the "struct ovsdb".
109 *
110 * On success, stores a pointer to the new database in '*dbp' and returns a
111 * null pointer. On failure, returns an ovsdb_error (which the caller must
112 * destroy) and sets '*dbp' to NULL. */
113struct ovsdb_error *
114ovsdb_file_open_as_schema(const char *file_name,
115 const struct ovsdb_schema *schema,
116 struct ovsdb **dbp)
117{
ada496b5 118 return ovsdb_file_open__(file_name, schema, true, dbp, NULL);
1e19e50e
BP
119}
120
121static struct ovsdb_error *
e1ebc8ce
BP
122ovsdb_file_open_log(const char *file_name, enum ovsdb_log_open_mode open_mode,
123 struct ovsdb_log **logp, struct ovsdb_schema **schemap)
bd06962a 124{
ada496b5 125 struct ovsdb_schema *schema = NULL;
ada496b5 126 struct ovsdb_log *log = NULL;
e1ebc8ce
BP
127 struct ovsdb_error *error;
128 struct json *json = NULL;
ada496b5 129
cb22974d 130 ovs_assert(logp || schemap);
bd06962a 131
19b276cb 132 error = ovsdb_log_open(file_name, OVSDB_MAGIC, open_mode, -1, &log);
bd06962a 133 if (error) {
ada496b5 134 goto error;
bd06962a
BP
135 }
136
137 error = ovsdb_log_read(log, &json);
138 if (error) {
ada496b5 139 goto error;
bd06962a 140 } else if (!json) {
ada496b5
BP
141 error = ovsdb_io_error(EOF, "%s: database file contains no schema",
142 file_name);
143 goto error;
bd06962a
BP
144 }
145
e1ebc8ce 146 if (schemap) {
1e19e50e
BP
147 error = ovsdb_schema_from_json(json, &schema);
148 if (error) {
ada496b5
BP
149 error = ovsdb_wrap_error(error,
150 "failed to parse \"%s\" as ovsdb schema",
151 file_name);
152 goto error;
1e19e50e 153 }
bd06962a
BP
154 }
155 json_destroy(json);
156
e1ebc8ce
BP
157 if (logp) {
158 *logp = log;
159 } else {
160 ovsdb_log_close(log);
161 }
162 if (schemap) {
163 *schemap = schema;
164 }
165 return NULL;
166
167error:
168 ovsdb_log_close(log);
169 json_destroy(json);
170 if (logp) {
171 *logp = NULL;
172 }
173 if (schemap) {
174 *schemap = NULL;
175 }
176 return error;
177}
178
179static struct ovsdb_error *
180ovsdb_file_open__(const char *file_name,
181 const struct ovsdb_schema *alternate_schema,
182 bool read_only, struct ovsdb **dbp,
183 struct ovsdb_file **filep)
184{
185 enum ovsdb_log_open_mode open_mode;
e1ebc8ce
BP
186 struct ovsdb_schema *schema = NULL;
187 struct ovsdb_error *error;
188 struct ovsdb_log *log;
189 struct json *json;
190 struct ovsdb *db = NULL;
191
192 /* In read-only mode there is no ovsdb_file so 'filep' must be null. */
cb22974d 193 ovs_assert(!(read_only && filep));
e1ebc8ce
BP
194
195 open_mode = read_only ? OVSDB_LOG_READ_ONLY : OVSDB_LOG_READ_WRITE;
196 error = ovsdb_file_open_log(file_name, open_mode, &log,
197 alternate_schema ? NULL : &schema);
198 if (error) {
199 goto error;
200 }
201
202 db = ovsdb_create(schema ? schema : ovsdb_schema_clone(alternate_schema));
ada496b5 203
448b2003
BP
204 /* When a log gets big, we compact it into a new log that initially has
205 * only a single transaction that represents the entire state of the
206 * database. Thus, we consider the first transaction in the database to be
207 * the snapshot. We measure its size to later influence the minimum log
208 * size before compacting again.
209 *
210 * The schema precedes the snapshot in the log; we could compensate for its
211 * size, but it's just not that important. */
212 off_t snapshot_size = 0;
213 unsigned int n_transactions = 0;
bd06962a
BP
214 while ((error = ovsdb_log_read(log, &json)) == NULL && json) {
215 struct ovsdb_txn *txn;
216
1e19e50e 217 error = ovsdb_file_txn_from_json(db, json, alternate_schema != NULL,
2958f35b 218 &txn);
bd06962a
BP
219 json_destroy(json);
220 if (error) {
43675e26 221 ovsdb_log_unread(log);
bd06962a
BP
222 break;
223 }
224
ada496b5 225 n_transactions++;
43675e26
BP
226 error = ovsdb_txn_commit(txn, false);
227 if (error) {
228 ovsdb_log_unread(log);
229 break;
230 }
448b2003
BP
231
232 if (n_transactions == 1) {
233 snapshot_size = ovsdb_log_get_offset(log);
234 }
bd06962a
BP
235 }
236 if (error) {
ada496b5
BP
237 /* Log error but otherwise ignore it. Probably the database just got
238 * truncated due to power failure etc. and we should use its current
239 * contents. */
3865965d 240 char *msg = ovsdb_error_to_string_free(error);
ff9f6644 241 VLOG_ERR("%s", msg);
bd06962a 242 free(msg);
bd06962a
BP
243 }
244
245 if (!read_only) {
ada496b5
BP
246 struct ovsdb_file *file;
247
448b2003
BP
248 error = ovsdb_file_create(db, log, file_name, n_transactions,
249 snapshot_size, &file);
ada496b5
BP
250 if (error) {
251 goto error;
252 }
253 if (filep) {
254 *filep = file;
255 }
bd06962a
BP
256 } else {
257 ovsdb_log_close(log);
258 }
259
260 *dbp = db;
261 return NULL;
ada496b5
BP
262
263error:
264 *dbp = NULL;
265 if (filep) {
266 *filep = NULL;
267 }
268 ovsdb_destroy(db);
ada496b5
BP
269 ovsdb_log_close(log);
270 return error;
bd06962a
BP
271}
272
1e19e50e
BP
273static struct ovsdb_error *
274ovsdb_file_update_row_from_json(struct ovsdb_row *row, bool converting,
275 const struct json *json)
276{
277 struct ovsdb_table_schema *schema = row->table->schema;
278 struct ovsdb_error *error;
279 struct shash_node *node;
280
281 if (json->type != JSON_OBJECT) {
282 return ovsdb_syntax_error(json, NULL, "row must be JSON object");
283 }
284
285 SHASH_FOR_EACH (node, json_object(json)) {
286 const char *column_name = node->name;
287 const struct ovsdb_column *column;
288 struct ovsdb_datum datum;
289
290 column = ovsdb_table_schema_get_column(schema, column_name);
291 if (!column) {
292 if (converting) {
293 continue;
294 }
295 return ovsdb_syntax_error(json, "unknown column",
296 "No column %s in table %s.",
297 column_name, schema->name);
298 }
299
300 error = ovsdb_datum_from_json(&datum, &column->type, node->data, NULL);
301 if (error) {
302 return error;
303 }
304 ovsdb_datum_swap(&row->fields[column->index], &datum);
305 ovsdb_datum_destroy(&datum, &column->type);
306 }
307
308 return NULL;
309}
310
bd06962a
BP
311static struct ovsdb_error *
312ovsdb_file_txn_row_from_json(struct ovsdb_txn *txn, struct ovsdb_table *table,
1e19e50e 313 bool converting,
bd06962a
BP
314 const struct uuid *row_uuid, struct json *json)
315{
316 const struct ovsdb_row *row = ovsdb_table_get_row(table, row_uuid);
317 if (json->type == JSON_NULL) {
318 if (!row) {
319 return ovsdb_syntax_error(NULL, NULL, "transaction deletes "
320 "row "UUID_FMT" that does not exist",
321 UUID_ARGS(row_uuid));
322 }
323 ovsdb_txn_row_delete(txn, row);
324 return NULL;
325 } else if (row) {
1e19e50e
BP
326 return ovsdb_file_update_row_from_json(ovsdb_txn_row_modify(txn, row),
327 converting, json);
bd06962a
BP
328 } else {
329 struct ovsdb_error *error;
330 struct ovsdb_row *new;
331
332 new = ovsdb_row_create(table);
333 *ovsdb_row_get_uuid_rw(new) = *row_uuid;
1e19e50e 334 error = ovsdb_file_update_row_from_json(new, converting, json);
bd06962a
BP
335 if (error) {
336 ovsdb_row_destroy(new);
3697c062
BP
337 } else {
338 ovsdb_txn_row_insert(txn, new);
bd06962a 339 }
bd06962a
BP
340 return error;
341 }
342}
343
344static struct ovsdb_error *
345ovsdb_file_txn_table_from_json(struct ovsdb_txn *txn,
1e19e50e
BP
346 struct ovsdb_table *table,
347 bool converting, struct json *json)
bd06962a
BP
348{
349 struct shash_node *node;
350
351 if (json->type != JSON_OBJECT) {
352 return ovsdb_syntax_error(json, NULL, "object expected");
353 }
354
355 SHASH_FOR_EACH (node, json->u.object) {
356 const char *uuid_string = node->name;
357 struct json *txn_row_json = node->data;
358 struct ovsdb_error *error;
359 struct uuid row_uuid;
360
361 if (!uuid_from_string(&row_uuid, uuid_string)) {
362 return ovsdb_syntax_error(json, NULL, "\"%s\" is not a valid UUID",
363 uuid_string);
364 }
365
1e19e50e
BP
366 error = ovsdb_file_txn_row_from_json(txn, table, converting,
367 &row_uuid, txn_row_json);
bd06962a
BP
368 if (error) {
369 return error;
370 }
371 }
372
373 return NULL;
374}
375
ada496b5
BP
376/* Converts 'json' to an ovsdb_txn for 'db', storing the new transaction in
377 * '*txnp'. Returns NULL if successful, otherwise an error.
378 *
379 * If 'converting' is true, then unknown table and column names are ignored
380 * (which can ease upgrading and downgrading schemas); otherwise, they are
2958f35b 381 * treated as errors. */
bd06962a
BP
382static struct ovsdb_error *
383ovsdb_file_txn_from_json(struct ovsdb *db, const struct json *json,
2958f35b 384 bool converting, struct ovsdb_txn **txnp)
bd06962a
BP
385{
386 struct ovsdb_error *error;
387 struct shash_node *node;
388 struct ovsdb_txn *txn;
389
390 *txnp = NULL;
ada496b5 391
bd06962a
BP
392 if (json->type != JSON_OBJECT) {
393 return ovsdb_syntax_error(json, NULL, "object expected");
394 }
395
396 txn = ovsdb_txn_create(db);
397 SHASH_FOR_EACH (node, json->u.object) {
398 const char *table_name = node->name;
ada496b5 399 struct json *node_json = node->data;
bd06962a
BP
400 struct ovsdb_table *table;
401
402 table = shash_find_data(&db->tables, table_name);
403 if (!table) {
d171b584 404 if (!strcmp(table_name, "_date")
ada496b5 405 && node_json->type == JSON_INTEGER) {
ada496b5
BP
406 continue;
407 } else if (!strcmp(table_name, "_comment") || converting) {
d171b584
BP
408 continue;
409 }
410
bd06962a
BP
411 error = ovsdb_syntax_error(json, "unknown table",
412 "No table named %s.", table_name);
413 goto error;
414 }
415
1e19e50e 416 error = ovsdb_file_txn_table_from_json(txn, table, converting,
ada496b5 417 node_json);
bd06962a
BP
418 if (error) {
419 goto error;
420 }
421 }
422 *txnp = txn;
423 return NULL;
424
425error:
426 ovsdb_txn_abort(txn);
427 return error;
428}
1e19e50e 429
ada496b5
BP
430static struct ovsdb_error *
431ovsdb_file_save_copy__(const char *file_name, int locking,
432 const char *comment, const struct ovsdb *db,
433 struct ovsdb_log **logp)
1e19e50e
BP
434{
435 const struct shash_node *node;
436 struct ovsdb_file_txn ftxn;
437 struct ovsdb_error *error;
438 struct ovsdb_log *log;
439 struct json *json;
440
19b276cb 441 error = ovsdb_log_open(file_name, OVSDB_MAGIC,
1e0b7e94 442 OVSDB_LOG_CREATE_EXCL, locking, &log);
1e19e50e
BP
443 if (error) {
444 return error;
445 }
446
447 /* Write schema. */
448 json = ovsdb_schema_to_json(db->schema);
449 error = ovsdb_log_write(log, json);
450 json_destroy(json);
451 if (error) {
452 goto exit;
453 }
454
455 /* Write data. */
456 ovsdb_file_txn_init(&ftxn);
457 SHASH_FOR_EACH (node, &db->tables) {
458 const struct ovsdb_table *table = node->data;
459 const struct ovsdb_row *row;
460
4e8e4213 461 HMAP_FOR_EACH (row, hmap_node, &table->rows) {
17d18afb 462 ovsdb_file_txn_add_row(&ftxn, NULL, row, NULL);
1e19e50e
BP
463 }
464 }
465 error = ovsdb_file_txn_commit(ftxn.json, comment, true, log);
466
467exit:
ada496b5
BP
468 if (logp) {
469 if (!error) {
470 *logp = log;
471 log = NULL;
472 } else {
473 *logp = NULL;
474 }
475 }
1e19e50e
BP
476 ovsdb_log_close(log);
477 if (error) {
478 remove(file_name);
479 }
480 return error;
481}
ada496b5
BP
482
483/* Saves a snapshot of 'db''s current contents as 'file_name'. If 'comment' is
484 * nonnull, then it is added along with the data contents and can be viewed
485 * with "ovsdb-tool show-log".
486 *
487 * 'locking' is passed along to ovsdb_log_open() untouched. */
488struct ovsdb_error *
489ovsdb_file_save_copy(const char *file_name, int locking,
490 const char *comment, const struct ovsdb *db)
491{
492 return ovsdb_file_save_copy__(file_name, locking, comment, db, NULL);
493}
e1ebc8ce
BP
494
495/* Opens database 'file_name', reads its schema, and closes it. On success,
496 * stores the schema into '*schemap' and returns NULL; the caller then owns the
497 * schema. On failure, returns an ovsdb_error (which the caller must destroy)
498 * and sets '*dbp' to NULL. */
499struct ovsdb_error *
500ovsdb_file_read_schema(const char *file_name, struct ovsdb_schema **schemap)
501{
cb22974d 502 ovs_assert(schemap != NULL);
e1ebc8ce
BP
503 return ovsdb_file_open_log(file_name, OVSDB_LOG_READ_ONLY, NULL, schemap);
504}
bd06962a
BP
505\f
506/* Replica implementation. */
507
afe20d5c 508struct ovsdb_file {
bd06962a 509 struct ovsdb_replica replica;
ada496b5 510 struct ovsdb *db;
bd06962a 511 struct ovsdb_log *log;
ada496b5 512 char *file_name;
2958f35b 513 long long int last_compact;
ada496b5
BP
514 long long int next_compact;
515 unsigned int n_transactions;
448b2003 516 off_t snapshot_size;
bd06962a
BP
517};
518
afe20d5c 519static const struct ovsdb_replica_class ovsdb_file_class;
bd06962a 520
ada496b5
BP
521static struct ovsdb_error *
522ovsdb_file_create(struct ovsdb *db, struct ovsdb_log *log,
523 const char *file_name,
448b2003 524 unsigned int n_transactions, off_t snapshot_size,
ada496b5 525 struct ovsdb_file **filep)
bd06962a 526{
ada496b5 527 struct ovsdb_file *file;
a35ae81c 528 char *deref_name;
ada496b5
BP
529 char *abs_name;
530
531 /* Use the absolute name of the file because ovsdb-server opens its
532 * database before daemonize() chdirs to "/". */
a35ae81c
BP
533 deref_name = follow_symlinks(file_name);
534 abs_name = abs_file_name(NULL, deref_name);
535 free(deref_name);
ada496b5
BP
536 if (!abs_name) {
537 *filep = NULL;
538 return ovsdb_io_error(0, "could not determine current "
539 "working directory");
540 }
541
542 file = xmalloc(sizeof *file);
afe20d5c 543 ovsdb_replica_init(&file->replica, &ovsdb_file_class);
ada496b5 544 file->db = db;
afe20d5c 545 file->log = log;
ada496b5 546 file->file_name = abs_name;
2958f35b
PI
547 file->last_compact = time_msec();
548 file->next_compact = file->last_compact + COMPACT_MIN_MSEC;
448b2003 549 file->snapshot_size = snapshot_size;
ada496b5 550 file->n_transactions = n_transactions;
afe20d5c 551 ovsdb_add_replica(db, &file->replica);
ada496b5
BP
552
553 *filep = file;
554 return NULL;
bd06962a
BP
555}
556
afe20d5c
BP
557static struct ovsdb_file *
558ovsdb_file_cast(struct ovsdb_replica *replica)
bd06962a 559{
cb22974d 560 ovs_assert(replica->class == &ovsdb_file_class);
afe20d5c 561 return CONTAINER_OF(replica, struct ovsdb_file, replica);
bd06962a
BP
562}
563
bd06962a 564static bool
afe20d5c
BP
565ovsdb_file_change_cb(const struct ovsdb_row *old,
566 const struct ovsdb_row *new,
567 const unsigned long int *changed,
568 void *ftxn_)
a3d573ed
BP
569{
570 struct ovsdb_file_txn *ftxn = ftxn_;
17d18afb 571 ovsdb_file_txn_add_row(ftxn, old, new, changed);
a3d573ed
BP
572 return true;
573}
574
4d0a31b6
BP
575struct json *
576ovsdb_file_txn_annotate(struct json *json, const char *comment)
577{
578 if (!json) {
579 json = json_object_create();
580 }
581 if (comment) {
582 json_object_put_string(json, "_comment", comment);
583 }
584 json_object_put(json, "_date", json_integer_create(time_wall_msec()));
585 return json;
586}
587
a3d573ed 588static struct ovsdb_error *
afe20d5c
BP
589ovsdb_file_commit(struct ovsdb_replica *replica,
590 const struct ovsdb_txn *txn, bool durable)
a3d573ed 591{
afe20d5c 592 struct ovsdb_file *file = ovsdb_file_cast(replica);
a3d573ed 593 struct ovsdb_file_txn ftxn;
ada496b5 594 struct ovsdb_error *error;
a3d573ed
BP
595
596 ovsdb_file_txn_init(&ftxn);
afe20d5c 597 ovsdb_txn_for_each_change(txn, ovsdb_file_change_cb, &ftxn);
a3d573ed
BP
598 if (!ftxn.json) {
599 /* Nothing to commit. */
600 return NULL;
601 }
602
ada496b5
BP
603 error = ovsdb_file_txn_commit(ftxn.json, ovsdb_txn_get_comment(txn),
604 durable, file->log);
605 if (error) {
606 return error;
607 }
608 file->n_transactions++;
609
19616e46
BP
610 /* If it has been at least COMPACT_MIN_MSEC ms since the last time we
611 * compacted (or at least COMPACT_RETRY_MSEC ms since the last time we
ada496b5 612 * tried), and if there are at least 100 transactions in the database, and
448b2003
BP
613 * if the database is at least 10 MB, and the database is at least 4x the
614 * size of the previous snapshot, then compact the database. */
615 off_t log_size = ovsdb_log_get_offset(file->log);
ada496b5
BP
616 if (time_msec() >= file->next_compact
617 && file->n_transactions >= 100
448b2003
BP
618 && log_size >= 10 * 1024 * 1024
619 && log_size / 4 >= file->snapshot_size)
ada496b5
BP
620 {
621 error = ovsdb_file_compact(file);
622 if (error) {
3865965d 623 char *s = ovsdb_error_to_string_free(error);
ada496b5 624 VLOG_WARN("%s: compacting database failed (%s), retrying in "
8c7ea6a0
BP
625 "%d seconds",
626 file->file_name, s, COMPACT_RETRY_MSEC / 1000);
ada496b5
BP
627 free(s);
628
629 file->next_compact = time_msec() + COMPACT_RETRY_MSEC;
630 }
631 }
632
633 return NULL;
634}
635
84a13f61
AS
636/* Rename 'old' to 'new', replacing 'new' if it exists. Returns NULL if
637 * successful, otherwise an ovsdb_error that the caller must destroy. */
638static struct ovsdb_error * OVS_WARN_UNUSED_RESULT
639ovsdb_rename(const char *old, const char *new)
640{
641#ifdef _WIN32
642 int error = (MoveFileEx(old, new, MOVEFILE_REPLACE_EXISTING
643 | MOVEFILE_WRITE_THROUGH | MOVEFILE_COPY_ALLOWED)
644 ? 0 : EACCES);
645#else
646 int error = rename(old, new) ? errno : 0;
647#endif
648
649 return (error
650 ? ovsdb_io_error(error, "failed to rename \"%s\" to \"%s\"",
651 old, new)
652 : NULL);
653}
654
ada496b5
BP
655struct ovsdb_error *
656ovsdb_file_compact(struct ovsdb_file *file)
657{
658 struct ovsdb_log *new_log = NULL;
659 struct lockfile *tmp_lock = NULL;
660 struct ovsdb_error *error;
661 char *tmp_name = NULL;
662 char *comment = NULL;
663 int retval;
664
665 comment = xasprintf("compacting database online "
666 "(%.3f seconds old, %u transactions, %llu bytes)",
2958f35b 667 (time_wall_msec() - file->last_compact) / 1000.0,
ada496b5
BP
668 file->n_transactions,
669 (unsigned long long) ovsdb_log_get_offset(file->log));
670 VLOG_INFO("%s: %s", file->file_name, comment);
671
672 /* Commit the old version, so that we can be assured that we'll eventually
673 * have either the old or the new version. */
674 error = ovsdb_log_commit(file->log);
675 if (error) {
676 goto exit;
677 }
678
679 /* Lock temporary file. */
680 tmp_name = xasprintf("%s.tmp", file->file_name);
4770e795 681 retval = lockfile_lock(tmp_name, &tmp_lock);
ada496b5
BP
682 if (retval) {
683 error = ovsdb_io_error(retval, "could not get lock on %s", tmp_name);
684 goto exit;
685 }
686
687 /* Remove temporary file. (It might not exist.) */
688 if (unlink(tmp_name) < 0 && errno != ENOENT) {
689 error = ovsdb_io_error(errno, "failed to remove %s", tmp_name);
690 goto exit;
691 }
692
693 /* Save a copy. */
694 error = ovsdb_file_save_copy__(tmp_name, false, comment, file->db,
695 &new_log);
696 if (error) {
697 goto exit;
698 }
699
84a13f61
AS
700 /* Replace original file by the temporary file.
701 *
702 * We support two strategies:
703 *
704 * - The preferred strategy is to rename the temporary file over the
705 * original one in-place, then close the original one. This works on
706 * Unix-like systems. It does not work on Windows, which does not
707 * allow open files to be renamed. The approach has the advantage
708 * that, at any point, we can drop back to something that already
709 * works.
710 *
711 * - Alternatively, we can close both files, rename, then open the new
712 * file (which now has the original name). This works on all
713 * systems, but if reopening the file fails then we're stuck and have
714 * to abort (XXX although it would be better to retry).
715 *
716 * We make the strategy a variable instead of an #ifdef to make it easier
717 * to test both strategies on Unix-like systems, and to make the code
718 * easier to read. */
719#ifdef _WIN32
720 bool rename_open_files = false;
721#else
722 bool rename_open_files = true;
723#endif
724 if (!rename_open_files) {
725 ovsdb_log_close(file->log);
726 ovsdb_log_close(new_log);
727 file->log = NULL;
728 new_log = NULL;
729 }
730 error = ovsdb_rename(tmp_name, file->file_name);
731 if (error) {
ada496b5
BP
732 goto exit;
733 }
84a13f61
AS
734 if (rename_open_files) {
735 fsync_parent_dir(file->file_name);
ada496b5
BP
736 ovsdb_log_close(file->log);
737 file->log = new_log;
ada496b5 738 } else {
84a13f61
AS
739 /* Re-open the log. This skips past the schema log record. */
740 error = ovsdb_file_open_log(file->file_name, OVSDB_LOG_READ_WRITE,
741 &file->log, NULL);
742 if (error) {
743 ovs_fatal(0, "could not reopen database");
744 }
745
746 /* Skip past the data log reecord. */
747 struct json *json;
748 error = ovsdb_log_read(file->log, &json);
749 if (error) {
750 ovs_fatal(0, "error reading database");
751 }
752 json_destroy(json);
753 }
754
755 /* Success! */
756 file->last_compact = time_msec();
757 file->next_compact = file->last_compact + COMPACT_MIN_MSEC;
758 file->n_transactions = 1;
759
760exit:
761 if (error) {
ada496b5
BP
762 ovsdb_log_close(new_log);
763 if (tmp_lock) {
764 unlink(tmp_name);
765 }
766 }
767
768 lockfile_unlock(tmp_lock);
769 free(tmp_name);
770 free(comment);
771
772 return error;
a3d573ed
BP
773}
774
775static void
afe20d5c 776ovsdb_file_destroy(struct ovsdb_replica *replica)
a3d573ed 777{
afe20d5c 778 struct ovsdb_file *file = ovsdb_file_cast(replica);
a3d573ed 779
afe20d5c 780 ovsdb_log_close(file->log);
ada496b5 781 free(file->file_name);
afe20d5c 782 free(file);
a3d573ed
BP
783}
784
afe20d5c
BP
785static const struct ovsdb_replica_class ovsdb_file_class = {
786 ovsdb_file_commit,
787 ovsdb_file_destroy
a3d573ed
BP
788};
789\f
790static void
791ovsdb_file_txn_init(struct ovsdb_file_txn *ftxn)
792{
793 ftxn->json = NULL;
794 ftxn->table_json = NULL;
795 ftxn->table = NULL;
796}
797
798static void
799ovsdb_file_txn_add_row(struct ovsdb_file_txn *ftxn,
800 const struct ovsdb_row *old,
17d18afb
BP
801 const struct ovsdb_row *new,
802 const unsigned long int *changed)
bd06962a 803{
bd06962a
BP
804 struct json *row;
805
806 if (!new) {
807 row = json_null_create();
808 } else {
809 struct shash_node *node;
810
88942565 811 row = old ? NULL : json_object_create();
bd06962a
BP
812 SHASH_FOR_EACH (node, &new->table->schema->columns) {
813 const struct ovsdb_column *column = node->data;
814 const struct ovsdb_type *type = &column->type;
815 unsigned int idx = column->index;
816
817 if (idx != OVSDB_COL_UUID && column->persistent
c532bf9d 818 && (old
17d18afb 819 ? bitmap_is_set(changed, idx)
c532bf9d 820 : !ovsdb_datum_is_default(&new->fields[idx], type)))
bd06962a
BP
821 {
822 if (!row) {
823 row = json_object_create();
824 }
825 json_object_put(row, column->name,
826 ovsdb_datum_to_json(&new->fields[idx], type));
827 }
828 }
829 }
830
831 if (row) {
832 struct ovsdb_table *table = new ? new->table : old->table;
833 char uuid[UUID_LEN + 1];
834
a3d573ed 835 if (table != ftxn->table) {
bd06962a 836 /* Create JSON object for transaction overall. */
a3d573ed
BP
837 if (!ftxn->json) {
838 ftxn->json = json_object_create();
bd06962a
BP
839 }
840
841 /* Create JSON object for transaction on this table. */
a3d573ed
BP
842 ftxn->table_json = json_object_create();
843 ftxn->table = table;
844 json_object_put(ftxn->json, table->schema->name, ftxn->table_json);
bd06962a
BP
845 }
846
847 /* Add row to transaction for this table. */
848 snprintf(uuid, sizeof uuid,
849 UUID_FMT, UUID_ARGS(ovsdb_row_get_uuid(new ? new : old)));
a3d573ed 850 json_object_put(ftxn->table_json, uuid, row);
bd06962a 851 }
bd06962a
BP
852}
853
854static struct ovsdb_error *
a3d573ed
BP
855ovsdb_file_txn_commit(struct json *json, const char *comment,
856 bool durable, struct ovsdb_log *log)
bd06962a 857{
bd06962a 858 struct ovsdb_error *error;
bd06962a 859
4d0a31b6 860 json = ovsdb_file_txn_annotate(json, comment);
a3d573ed
BP
861 error = ovsdb_log_write(log, json);
862 json_destroy(json);
bd06962a
BP
863 if (error) {
864 return ovsdb_wrap_error(error, "writing transaction failed");
865 }
866
867 if (durable) {
a3d573ed 868 error = ovsdb_log_commit(log);
bd06962a
BP
869 if (error) {
870 return ovsdb_wrap_error(error, "committing transaction failed");
871 }
872 }
873
874 return NULL;
875}