]> git.proxmox.com Git - ceph.git/blame - ceph/src/pmdk/src/common/shutdown_state.c
import ceph 16.2.7
[ceph.git] / ceph / src / pmdk / src / common / shutdown_state.c
CommitLineData
a4b75251
TL
1// SPDX-License-Identifier: BSD-3-Clause
2/* Copyright 2017-2020, Intel Corporation */
3
4/*
5 * shutdown_state.c -- unsafe shudown detection
6 */
7
8#include <string.h>
9#include <stdbool.h>
10#include <endian.h>
11#include "shutdown_state.h"
12#include "out.h"
13#include "util.h"
14#include "os_deep.h"
15#include "set.h"
16#include "libpmem2.h"
17#include "badblocks.h"
18#include "../libpmem2/pmem2_utils.h"
19
20#define FLUSH_SDS(sds, rep) \
21 if ((rep) != NULL) os_part_deep_common(rep, 0, sds, sizeof(*(sds)), 1)
22
23/*
24 * shutdown_state_checksum -- (internal) counts SDS checksum and flush it
25 */
26static void
27shutdown_state_checksum(struct shutdown_state *sds, struct pool_replica *rep)
28{
29 LOG(3, "sds %p", sds);
30
31 util_checksum(sds, sizeof(*sds), &sds->checksum, 1, 0);
32 FLUSH_SDS(sds, rep);
33}
34
35/*
36 * shutdown_state_init -- initializes shutdown_state struct
37 */
38int
39shutdown_state_init(struct shutdown_state *sds, struct pool_replica *rep)
40{
41 /* check if we didn't change size of shutdown_state accidentally */
42 COMPILE_ERROR_ON(sizeof(struct shutdown_state) != 64);
43 LOG(3, "sds %p", sds);
44
45 memset(sds, 0, sizeof(*sds));
46
47 shutdown_state_checksum(sds, rep);
48
49 return 0;
50}
51
52/*
53 * shutdown_state_add_part -- adds file uuid and usc to shutdown_state struct
54 *
55 * if path does not exist it will fail which does NOT mean shutdown failure
56 */
57int
58shutdown_state_add_part(struct shutdown_state *sds, int fd,
59 struct pool_replica *rep)
60{
61 LOG(3, "sds %p, fd %d", sds, fd);
62
63 size_t len = 0;
64 char *uid;
65 uint64_t usc;
66
67 struct pmem2_source *src;
68
69 if (pmem2_source_from_fd(&src, fd))
70 return 1;
71
72 int ret = pmem2_source_device_usc(src, &usc);
73
74 if (ret == PMEM2_E_NOSUPP) {
75 usc = 0;
76 } else if (ret != 0) {
77 if (ret == -EPERM) {
78 /* overwrite error message */
79 ERR(
80 "Cannot read unsafe shutdown count. For more information please check https://github.com/pmem/pmdk/issues/4207");
81 }
82 LOG(2, "cannot read unsafe shutdown count for %d", fd);
83 goto err;
84 }
85
86 ret = pmem2_source_device_id(src, NULL, &len);
87 if (ret != PMEM2_E_NOSUPP && ret != 0) {
88 ERR("cannot read uuid of %d", fd);
89 goto err;
90 }
91
92 len += 4 - len % 4;
93 uid = Zalloc(len);
94
95 if (uid == NULL) {
96 ERR("!Zalloc");
97 goto err;
98 }
99
100 ret = pmem2_source_device_id(src, uid, &len);
101 if (ret != PMEM2_E_NOSUPP && ret != 0) {
102 ERR("cannot read uuid of %d", fd);
103 Free(uid);
104 goto err;
105 }
106
107 sds->usc = htole64(le64toh(sds->usc) + usc);
108
109 uint64_t tmp;
110 util_checksum(uid, len, &tmp, 1, 0);
111 sds->uuid = htole64(le64toh(sds->uuid) + tmp);
112
113 FLUSH_SDS(sds, rep);
114 Free(uid);
115 pmem2_source_delete(&src);
116 shutdown_state_checksum(sds, rep);
117 return 0;
118err:
119 pmem2_source_delete(&src);
120 return 1;
121}
122
123/*
124 * shutdown_state_set_dirty -- sets dirty pool flag
125 */
126void
127shutdown_state_set_dirty(struct shutdown_state *sds, struct pool_replica *rep)
128{
129 LOG(3, "sds %p", sds);
130
131 sds->dirty = 1;
132 rep->part[0].sds_dirty_modified = 1;
133
134 FLUSH_SDS(sds, rep);
135
136 shutdown_state_checksum(sds, rep);
137}
138
139/*
140 * shutdown_state_clear_dirty -- clears dirty pool flag
141 */
142void
143shutdown_state_clear_dirty(struct shutdown_state *sds, struct pool_replica *rep)
144{
145 LOG(3, "sds %p", sds);
146
147 struct pool_set_part part = rep->part[0];
148 /*
149 * If a dirty flag was set in previous program execution it should be
150 * preserved as it stores information about potential ADR failure.
151 */
152 if (part.sds_dirty_modified != 1)
153 return;
154
155 sds->dirty = 0;
156 part.sds_dirty_modified = 0;
157
158 FLUSH_SDS(sds, rep);
159
160 shutdown_state_checksum(sds, rep);
161}
162
163/*
164 * shutdown_state_reinit -- (internal) reinitializes shutdown_state struct
165 */
166static void
167shutdown_state_reinit(struct shutdown_state *curr_sds,
168 struct shutdown_state *pool_sds, struct pool_replica *rep)
169{
170 LOG(3, "curr_sds %p, pool_sds %p", curr_sds, pool_sds);
171 shutdown_state_init(pool_sds, rep);
172 pool_sds->uuid = htole64(curr_sds->uuid);
173 pool_sds->usc = htole64(curr_sds->usc);
174 pool_sds->dirty = 0;
175
176 FLUSH_SDS(pool_sds, rep);
177
178 shutdown_state_checksum(pool_sds, rep);
179}
180
181/*
182 * shutdown_state_check -- compares and fixes shutdown state
183 */
184int
185shutdown_state_check(struct shutdown_state *curr_sds,
186 struct shutdown_state *pool_sds, struct pool_replica *rep)
187{
188 LOG(3, "curr_sds %p, pool_sds %p", curr_sds, pool_sds);
189
190 if (util_is_zeroed(pool_sds, sizeof(*pool_sds)) &&
191 !util_is_zeroed(curr_sds, sizeof(*curr_sds))) {
192 shutdown_state_reinit(curr_sds, pool_sds, rep);
193 return 0;
194 }
195
196 bool is_uuid_usc_correct =
197 le64toh(pool_sds->usc) == le64toh(curr_sds->usc) &&
198 le64toh(pool_sds->uuid) == le64toh(curr_sds->uuid);
199
200 bool is_checksum_correct = util_checksum(pool_sds,
201 sizeof(*pool_sds), &pool_sds->checksum, 0, 0);
202
203 int dirty = pool_sds->dirty;
204
205 if (!is_checksum_correct) {
206 /* the program was killed during opening or closing the pool */
207 LOG(2, "incorrect checksum - SDS will be reinitialized");
208 shutdown_state_reinit(curr_sds, pool_sds, rep);
209 return 0;
210 }
211
212 if (is_uuid_usc_correct) {
213 if (dirty == 0)
214 return 0;
215 /*
216 * the program was killed when the pool was opened
217 * but there wasn't an ADR failure
218 */
219 LOG(2,
220 "the pool was not closed - SDS will be reinitialized");
221 shutdown_state_reinit(curr_sds, pool_sds, rep);
222 return 0;
223 }
224 if (dirty == 0) {
225 /* an ADR failure but the pool was closed */
226 LOG(2,
227 "an ADR failure was detected but the pool was closed - SDS will be reinitialized");
228 shutdown_state_reinit(curr_sds, pool_sds, rep);
229 return 0;
230 }
231 /* an ADR failure - the pool might be corrupted */
232 ERR("an ADR failure was detected, the pool might be corrupted");
233 return 1;
234}