1 /* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2016 Nicira, Inc.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
27 #include "openvswitch/json.h"
30 #include "ovsdb-error.h"
32 #include "socket-util.h"
35 #include "transaction.h"
38 #include "openvswitch/vlog.h"
40 VLOG_DEFINE_THIS_MODULE(ovsdb_file
);
42 /* Minimum number of milliseconds between database compactions. */
43 #define COMPACT_MIN_MSEC (10 * 60 * 1000) /* 10 minutes. */
45 /* Minimum number of milliseconds between trying to compact the database if
46 * compacting fails. */
47 #define COMPACT_RETRY_MSEC (60 * 1000) /* 1 minute. */
49 /* A transaction being converted to JSON for writing to a file. */
50 struct ovsdb_file_txn
{
51 struct json
*json
; /* JSON for the whole transaction. */
52 struct json
*table_json
; /* JSON for 'table''s transaction. */
53 struct ovsdb_table
*table
; /* Table described in 'table_json'. */
56 static void ovsdb_file_txn_init(struct ovsdb_file_txn
*);
57 static void ovsdb_file_txn_add_row(struct ovsdb_file_txn
*,
58 const struct ovsdb_row
*old
,
59 const struct ovsdb_row
*new,
60 const unsigned long int *changed
);
61 static struct ovsdb_error
*ovsdb_file_txn_commit(struct json
*,
66 static struct ovsdb_error
*ovsdb_file_open__(const char *file_name
,
67 const struct ovsdb_schema
*,
68 bool read_only
, struct ovsdb
**,
69 struct ovsdb_file
**);
70 static struct ovsdb_error
*ovsdb_file_txn_from_json(
71 struct ovsdb
*, const struct json
*, bool converting
, struct ovsdb_txn
**);
72 static struct ovsdb_error
*ovsdb_file_create(struct ovsdb
*,
74 const char *file_name
,
75 unsigned int n_transactions
,
77 struct ovsdb_file
**filep
);
79 /* Opens database 'file_name' and stores a pointer to the new database in
80 * '*dbp'. If 'read_only' is false, then the database will be locked and
81 * changes to the database will be written to disk. If 'read_only' is true,
82 * the database will not be locked and changes to the database will persist
83 * only as long as the "struct ovsdb".
85 * If 'filep' is nonnull and 'read_only' is false, then on success sets
86 * '*filep' to an ovsdb_file that represents the open file. This ovsdb_file
87 * persists until '*dbp' is destroyed.
89 * On success, returns NULL. On failure, returns an ovsdb_error (which the
90 * caller must destroy) and sets '*dbp' and '*filep' to NULL. */
92 ovsdb_file_open(const char *file_name
, bool read_only
,
93 struct ovsdb
**dbp
, struct ovsdb_file
**filep
)
95 return ovsdb_file_open__(file_name
, NULL
, read_only
, dbp
, filep
);
98 /* Opens database 'file_name' with an alternate schema. The specified 'schema'
99 * is used to interpret the data in 'file_name', ignoring the schema actually
100 * stored in the file. Data in the file for tables or columns that do not
101 * exist in 'schema' are ignored, but the ovsdb file format must otherwise be
102 * observed, including column constraints.
104 * This function can be useful for upgrading or downgrading databases to
105 * "almost-compatible" formats.
107 * The database will not be locked. Changes to the database will persist only
108 * as long as the "struct ovsdb".
110 * On success, stores a pointer to the new database in '*dbp' and returns a
111 * null pointer. On failure, returns an ovsdb_error (which the caller must
112 * destroy) and sets '*dbp' to NULL. */
114 ovsdb_file_open_as_schema(const char *file_name
,
115 const struct ovsdb_schema
*schema
,
118 return ovsdb_file_open__(file_name
, schema
, true, dbp
, NULL
);
121 static struct ovsdb_error
*
122 ovsdb_file_open_log(const char *file_name
, enum ovsdb_log_open_mode open_mode
,
123 struct ovsdb_log
**logp
, struct ovsdb_schema
**schemap
)
125 struct ovsdb_schema
*schema
= NULL
;
126 struct ovsdb_log
*log
= NULL
;
127 struct ovsdb_error
*error
;
128 struct json
*json
= NULL
;
130 ovs_assert(logp
|| schemap
);
132 error
= ovsdb_log_open(file_name
, OVSDB_MAGIC
, open_mode
, -1, &log
);
137 error
= ovsdb_log_read(log
, &json
);
141 error
= ovsdb_io_error(EOF
, "%s: database file contains no schema",
147 error
= ovsdb_schema_from_json(json
, &schema
);
149 error
= ovsdb_wrap_error(error
,
150 "failed to parse \"%s\" as ovsdb schema",
160 ovsdb_log_close(log
);
168 ovsdb_log_close(log
);
179 static struct ovsdb_error
*
180 ovsdb_file_open__(const char *file_name
,
181 const struct ovsdb_schema
*alternate_schema
,
182 bool read_only
, struct ovsdb
**dbp
,
183 struct ovsdb_file
**filep
)
185 enum ovsdb_log_open_mode open_mode
;
186 struct ovsdb_schema
*schema
= NULL
;
187 struct ovsdb_error
*error
;
188 struct ovsdb_log
*log
;
190 struct ovsdb
*db
= NULL
;
192 /* In read-only mode there is no ovsdb_file so 'filep' must be null. */
193 ovs_assert(!(read_only
&& filep
));
195 open_mode
= read_only
? OVSDB_LOG_READ_ONLY
: OVSDB_LOG_READ_WRITE
;
196 error
= ovsdb_file_open_log(file_name
, open_mode
, &log
,
197 alternate_schema
? NULL
: &schema
);
202 db
= ovsdb_create(schema
? schema
: ovsdb_schema_clone(alternate_schema
));
204 /* When a log gets big, we compact it into a new log that initially has
205 * only a single transaction that represents the entire state of the
206 * database. Thus, we consider the first transaction in the database to be
207 * the snapshot. We measure its size to later influence the minimum log
208 * size before compacting again.
210 * The schema precedes the snapshot in the log; we could compensate for its
211 * size, but it's just not that important. */
212 off_t snapshot_size
= 0;
213 unsigned int n_transactions
= 0;
214 while ((error
= ovsdb_log_read(log
, &json
)) == NULL
&& json
) {
215 struct ovsdb_txn
*txn
;
217 error
= ovsdb_file_txn_from_json(db
, json
, alternate_schema
!= NULL
,
221 ovsdb_log_unread(log
);
226 error
= ovsdb_txn_commit(txn
, false);
228 ovsdb_log_unread(log
);
232 if (n_transactions
== 1) {
233 snapshot_size
= ovsdb_log_get_offset(log
);
237 /* Log error but otherwise ignore it. Probably the database just got
238 * truncated due to power failure etc. and we should use its current
240 char *msg
= ovsdb_error_to_string_free(error
);
246 struct ovsdb_file
*file
;
248 error
= ovsdb_file_create(db
, log
, file_name
, n_transactions
,
249 snapshot_size
, &file
);
257 ovsdb_log_close(log
);
269 ovsdb_log_close(log
);
273 static struct ovsdb_error
*
274 ovsdb_file_update_row_from_json(struct ovsdb_row
*row
, bool converting
,
275 const struct json
*json
)
277 struct ovsdb_table_schema
*schema
= row
->table
->schema
;
278 struct ovsdb_error
*error
;
279 struct shash_node
*node
;
281 if (json
->type
!= JSON_OBJECT
) {
282 return ovsdb_syntax_error(json
, NULL
, "row must be JSON object");
285 SHASH_FOR_EACH (node
, json_object(json
)) {
286 const char *column_name
= node
->name
;
287 const struct ovsdb_column
*column
;
288 struct ovsdb_datum datum
;
290 column
= ovsdb_table_schema_get_column(schema
, column_name
);
295 return ovsdb_syntax_error(json
, "unknown column",
296 "No column %s in table %s.",
297 column_name
, schema
->name
);
300 error
= ovsdb_datum_from_json(&datum
, &column
->type
, node
->data
, NULL
);
304 ovsdb_datum_swap(&row
->fields
[column
->index
], &datum
);
305 ovsdb_datum_destroy(&datum
, &column
->type
);
311 static struct ovsdb_error
*
312 ovsdb_file_txn_row_from_json(struct ovsdb_txn
*txn
, struct ovsdb_table
*table
,
314 const struct uuid
*row_uuid
, struct json
*json
)
316 const struct ovsdb_row
*row
= ovsdb_table_get_row(table
, row_uuid
);
317 if (json
->type
== JSON_NULL
) {
319 return ovsdb_syntax_error(NULL
, NULL
, "transaction deletes "
320 "row "UUID_FMT
" that does not exist",
321 UUID_ARGS(row_uuid
));
323 ovsdb_txn_row_delete(txn
, row
);
326 return ovsdb_file_update_row_from_json(ovsdb_txn_row_modify(txn
, row
),
329 struct ovsdb_error
*error
;
330 struct ovsdb_row
*new;
332 new = ovsdb_row_create(table
);
333 *ovsdb_row_get_uuid_rw(new) = *row_uuid
;
334 error
= ovsdb_file_update_row_from_json(new, converting
, json
);
336 ovsdb_row_destroy(new);
338 ovsdb_txn_row_insert(txn
, new);
344 static struct ovsdb_error
*
345 ovsdb_file_txn_table_from_json(struct ovsdb_txn
*txn
,
346 struct ovsdb_table
*table
,
347 bool converting
, struct json
*json
)
349 struct shash_node
*node
;
351 if (json
->type
!= JSON_OBJECT
) {
352 return ovsdb_syntax_error(json
, NULL
, "object expected");
355 SHASH_FOR_EACH (node
, json
->u
.object
) {
356 const char *uuid_string
= node
->name
;
357 struct json
*txn_row_json
= node
->data
;
358 struct ovsdb_error
*error
;
359 struct uuid row_uuid
;
361 if (!uuid_from_string(&row_uuid
, uuid_string
)) {
362 return ovsdb_syntax_error(json
, NULL
, "\"%s\" is not a valid UUID",
366 error
= ovsdb_file_txn_row_from_json(txn
, table
, converting
,
367 &row_uuid
, txn_row_json
);
376 /* Converts 'json' to an ovsdb_txn for 'db', storing the new transaction in
377 * '*txnp'. Returns NULL if successful, otherwise an error.
379 * If 'converting' is true, then unknown table and column names are ignored
380 * (which can ease upgrading and downgrading schemas); otherwise, they are
381 * treated as errors. */
382 static struct ovsdb_error
*
383 ovsdb_file_txn_from_json(struct ovsdb
*db
, const struct json
*json
,
384 bool converting
, struct ovsdb_txn
**txnp
)
386 struct ovsdb_error
*error
;
387 struct shash_node
*node
;
388 struct ovsdb_txn
*txn
;
392 if (json
->type
!= JSON_OBJECT
) {
393 return ovsdb_syntax_error(json
, NULL
, "object expected");
396 txn
= ovsdb_txn_create(db
);
397 SHASH_FOR_EACH (node
, json
->u
.object
) {
398 const char *table_name
= node
->name
;
399 struct json
*node_json
= node
->data
;
400 struct ovsdb_table
*table
;
402 table
= shash_find_data(&db
->tables
, table_name
);
404 if (!strcmp(table_name
, "_date")
405 && node_json
->type
== JSON_INTEGER
) {
407 } else if (!strcmp(table_name
, "_comment") || converting
) {
411 error
= ovsdb_syntax_error(json
, "unknown table",
412 "No table named %s.", table_name
);
416 error
= ovsdb_file_txn_table_from_json(txn
, table
, converting
,
426 ovsdb_txn_abort(txn
);
430 static struct ovsdb_error
*
431 ovsdb_file_save_copy__(const char *file_name
, int locking
,
432 const char *comment
, const struct ovsdb
*db
,
433 struct ovsdb_log
**logp
)
435 const struct shash_node
*node
;
436 struct ovsdb_file_txn ftxn
;
437 struct ovsdb_error
*error
;
438 struct ovsdb_log
*log
;
441 error
= ovsdb_log_open(file_name
, OVSDB_MAGIC
,
442 OVSDB_LOG_CREATE
, locking
, &log
);
448 json
= ovsdb_schema_to_json(db
->schema
);
449 error
= ovsdb_log_write(log
, json
);
456 ovsdb_file_txn_init(&ftxn
);
457 SHASH_FOR_EACH (node
, &db
->tables
) {
458 const struct ovsdb_table
*table
= node
->data
;
459 const struct ovsdb_row
*row
;
461 HMAP_FOR_EACH (row
, hmap_node
, &table
->rows
) {
462 ovsdb_file_txn_add_row(&ftxn
, NULL
, row
, NULL
);
465 error
= ovsdb_file_txn_commit(ftxn
.json
, comment
, true, log
);
476 ovsdb_log_close(log
);
483 /* Saves a snapshot of 'db''s current contents as 'file_name'. If 'comment' is
484 * nonnull, then it is added along with the data contents and can be viewed
485 * with "ovsdb-tool show-log".
487 * 'locking' is passed along to ovsdb_log_open() untouched. */
489 ovsdb_file_save_copy(const char *file_name
, int locking
,
490 const char *comment
, const struct ovsdb
*db
)
492 return ovsdb_file_save_copy__(file_name
, locking
, comment
, db
, NULL
);
495 /* Opens database 'file_name', reads its schema, and closes it. On success,
496 * stores the schema into '*schemap' and returns NULL; the caller then owns the
497 * schema. On failure, returns an ovsdb_error (which the caller must destroy)
498 * and sets '*dbp' to NULL. */
500 ovsdb_file_read_schema(const char *file_name
, struct ovsdb_schema
**schemap
)
502 ovs_assert(schemap
!= NULL
);
503 return ovsdb_file_open_log(file_name
, OVSDB_LOG_READ_ONLY
, NULL
, schemap
);
506 /* Replica implementation. */
509 struct ovsdb_replica replica
;
511 struct ovsdb_log
*log
;
513 long long int last_compact
;
514 long long int next_compact
;
515 unsigned int n_transactions
;
519 static const struct ovsdb_replica_class ovsdb_file_class
;
521 static struct ovsdb_error
*
522 ovsdb_file_create(struct ovsdb
*db
, struct ovsdb_log
*log
,
523 const char *file_name
,
524 unsigned int n_transactions
, off_t snapshot_size
,
525 struct ovsdb_file
**filep
)
527 struct ovsdb_file
*file
;
531 /* Use the absolute name of the file because ovsdb-server opens its
532 * database before daemonize() chdirs to "/". */
533 deref_name
= follow_symlinks(file_name
);
534 abs_name
= abs_file_name(NULL
, deref_name
);
538 return ovsdb_io_error(0, "could not determine current "
539 "working directory");
542 file
= xmalloc(sizeof *file
);
543 ovsdb_replica_init(&file
->replica
, &ovsdb_file_class
);
546 file
->file_name
= abs_name
;
547 file
->last_compact
= time_msec();
548 file
->next_compact
= file
->last_compact
+ COMPACT_MIN_MSEC
;
549 file
->snapshot_size
= snapshot_size
;
550 file
->n_transactions
= n_transactions
;
551 ovsdb_add_replica(db
, &file
->replica
);
557 static struct ovsdb_file
*
558 ovsdb_file_cast(struct ovsdb_replica
*replica
)
560 ovs_assert(replica
->class == &ovsdb_file_class
);
561 return CONTAINER_OF(replica
, struct ovsdb_file
, replica
);
565 ovsdb_file_change_cb(const struct ovsdb_row
*old
,
566 const struct ovsdb_row
*new,
567 const unsigned long int *changed
,
570 struct ovsdb_file_txn
*ftxn
= ftxn_
;
571 ovsdb_file_txn_add_row(ftxn
, old
, new, changed
);
576 ovsdb_file_txn_annotate(struct json
*json
, const char *comment
)
579 json
= json_object_create();
582 json_object_put_string(json
, "_comment", comment
);
584 json_object_put(json
, "_date", json_integer_create(time_wall_msec()));
588 static struct ovsdb_error
*
589 ovsdb_file_commit(struct ovsdb_replica
*replica
,
590 const struct ovsdb_txn
*txn
, bool durable
)
592 struct ovsdb_file
*file
= ovsdb_file_cast(replica
);
593 struct ovsdb_file_txn ftxn
;
594 struct ovsdb_error
*error
;
596 ovsdb_file_txn_init(&ftxn
);
597 ovsdb_txn_for_each_change(txn
, ovsdb_file_change_cb
, &ftxn
);
599 /* Nothing to commit. */
603 error
= ovsdb_file_txn_commit(ftxn
.json
, ovsdb_txn_get_comment(txn
),
608 file
->n_transactions
++;
610 /* If it has been at least COMPACT_MIN_MSEC ms since the last time we
611 * compacted (or at least COMPACT_RETRY_MSEC ms since the last time we
612 * tried), and if there are at least 100 transactions in the database, and
613 * if the database is at least 10 MB, and the database is at least 4x the
614 * size of the previous snapshot, then compact the database. */
615 off_t log_size
= ovsdb_log_get_offset(file
->log
);
616 if (time_msec() >= file
->next_compact
617 && file
->n_transactions
>= 100
618 && log_size
>= 10 * 1024 * 1024
619 && log_size
/ 4 >= file
->snapshot_size
)
621 error
= ovsdb_file_compact(file
);
623 char *s
= ovsdb_error_to_string_free(error
);
624 VLOG_WARN("%s: compacting database failed (%s), retrying in "
626 file
->file_name
, s
, COMPACT_RETRY_MSEC
/ 1000);
629 file
->next_compact
= time_msec() + COMPACT_RETRY_MSEC
;
636 /* Rename 'old' to 'new', replacing 'new' if it exists. Returns NULL if
637 * successful, otherwise an ovsdb_error that the caller must destroy. */
638 static struct ovsdb_error
* OVS_WARN_UNUSED_RESULT
639 ovsdb_rename(const char *old
, const char *new)
642 int error
= (MoveFileEx(old
, new, MOVEFILE_REPLACE_EXISTING
643 | MOVEFILE_WRITE_THROUGH
| MOVEFILE_COPY_ALLOWED
)
646 int error
= rename(old
, new) ? errno
: 0;
650 ? ovsdb_io_error(error
, "failed to rename \"%s\" to \"%s\"",
656 ovsdb_file_compact(struct ovsdb_file
*file
)
658 struct ovsdb_log
*new_log
= NULL
;
659 struct lockfile
*tmp_lock
= NULL
;
660 struct ovsdb_error
*error
;
661 char *tmp_name
= NULL
;
662 char *comment
= NULL
;
665 comment
= xasprintf("compacting database online "
666 "(%.3f seconds old, %u transactions, %llu bytes)",
667 (time_wall_msec() - file
->last_compact
) / 1000.0,
668 file
->n_transactions
,
669 (unsigned long long) ovsdb_log_get_offset(file
->log
));
670 VLOG_INFO("%s: %s", file
->file_name
, comment
);
672 /* Commit the old version, so that we can be assured that we'll eventually
673 * have either the old or the new version. */
674 error
= ovsdb_log_commit(file
->log
);
679 /* Lock temporary file. */
680 tmp_name
= xasprintf("%s.tmp", file
->file_name
);
681 retval
= lockfile_lock(tmp_name
, &tmp_lock
);
683 error
= ovsdb_io_error(retval
, "could not get lock on %s", tmp_name
);
687 /* Remove temporary file. (It might not exist.) */
688 if (unlink(tmp_name
) < 0 && errno
!= ENOENT
) {
689 error
= ovsdb_io_error(errno
, "failed to remove %s", tmp_name
);
694 error
= ovsdb_file_save_copy__(tmp_name
, false, comment
, file
->db
,
700 /* Replace original file by the temporary file.
702 * We support two strategies:
704 * - The preferred strategy is to rename the temporary file over the
705 * original one in-place, then close the original one. This works on
706 * Unix-like systems. It does not work on Windows, which does not
707 * allow open files to be renamed. The approach has the advantage
708 * that, at any point, we can drop back to something that already
711 * - Alternatively, we can close both files, rename, then open the new
712 * file (which now has the original name). This works on all
713 * systems, but if reopening the file fails then we're stuck and have
714 * to abort (XXX although it would be better to retry).
716 * We make the strategy a variable instead of an #ifdef to make it easier
717 * to test both strategies on Unix-like systems, and to make the code
720 bool rename_open_files
= false;
722 bool rename_open_files
= true;
724 if (!rename_open_files
) {
725 ovsdb_log_close(file
->log
);
726 ovsdb_log_close(new_log
);
730 error
= ovsdb_rename(tmp_name
, file
->file_name
);
734 if (rename_open_files
) {
735 fsync_parent_dir(file
->file_name
);
736 ovsdb_log_close(file
->log
);
739 /* Re-open the log. This skips past the schema log record. */
740 error
= ovsdb_file_open_log(file
->file_name
, OVSDB_LOG_READ_WRITE
,
743 ovs_fatal(0, "could not reopen database");
746 /* Skip past the data log reecord. */
748 error
= ovsdb_log_read(file
->log
, &json
);
750 ovs_fatal(0, "error reading database");
756 file
->last_compact
= time_msec();
757 file
->next_compact
= file
->last_compact
+ COMPACT_MIN_MSEC
;
758 file
->n_transactions
= 1;
762 ovsdb_log_close(new_log
);
768 lockfile_unlock(tmp_lock
);
776 ovsdb_file_destroy(struct ovsdb_replica
*replica
)
778 struct ovsdb_file
*file
= ovsdb_file_cast(replica
);
780 ovsdb_log_close(file
->log
);
781 free(file
->file_name
);
785 static const struct ovsdb_replica_class ovsdb_file_class
= {
791 ovsdb_file_txn_init(struct ovsdb_file_txn
*ftxn
)
794 ftxn
->table_json
= NULL
;
799 ovsdb_file_txn_add_row(struct ovsdb_file_txn
*ftxn
,
800 const struct ovsdb_row
*old
,
801 const struct ovsdb_row
*new,
802 const unsigned long int *changed
)
807 row
= json_null_create();
809 struct shash_node
*node
;
811 row
= old
? NULL
: json_object_create();
812 SHASH_FOR_EACH (node
, &new->table
->schema
->columns
) {
813 const struct ovsdb_column
*column
= node
->data
;
814 const struct ovsdb_type
*type
= &column
->type
;
815 unsigned int idx
= column
->index
;
817 if (idx
!= OVSDB_COL_UUID
&& column
->persistent
819 ? bitmap_is_set(changed
, idx
)
820 : !ovsdb_datum_is_default(&new->fields
[idx
], type
)))
823 row
= json_object_create();
825 json_object_put(row
, column
->name
,
826 ovsdb_datum_to_json(&new->fields
[idx
], type
));
832 struct ovsdb_table
*table
= new ? new->table
: old
->table
;
833 char uuid
[UUID_LEN
+ 1];
835 if (table
!= ftxn
->table
) {
836 /* Create JSON object for transaction overall. */
838 ftxn
->json
= json_object_create();
841 /* Create JSON object for transaction on this table. */
842 ftxn
->table_json
= json_object_create();
844 json_object_put(ftxn
->json
, table
->schema
->name
, ftxn
->table_json
);
847 /* Add row to transaction for this table. */
848 snprintf(uuid
, sizeof uuid
,
849 UUID_FMT
, UUID_ARGS(ovsdb_row_get_uuid(new ? new : old
)));
850 json_object_put(ftxn
->table_json
, uuid
, row
);
854 static struct ovsdb_error
*
855 ovsdb_file_txn_commit(struct json
*json
, const char *comment
,
856 bool durable
, struct ovsdb_log
*log
)
858 struct ovsdb_error
*error
;
860 json
= ovsdb_file_txn_annotate(json
, comment
);
861 error
= ovsdb_log_write(log
, json
);
864 return ovsdb_wrap_error(error
, "writing transaction failed");
868 error
= ovsdb_log_commit(log
);
870 return ovsdb_wrap_error(error
, "committing transaction failed");