]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - drivers/block/drbd/drbd_state.c
drbd: Renamed nms to ns_max
[mirror_ubuntu-zesty-kernel.git] / drivers / block / drbd / drbd_state.c
CommitLineData
b8907339
PR
1/*
2 drbd_state.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev
11 from Logicworks, Inc. for making SDP replication support possible.
12
13 drbd is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2, or (at your option)
16 any later version.
17
18 drbd is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with drbd; see the file COPYING. If not, write to
25 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
26 */
27
28#include <linux/drbd_limits.h>
29#include "drbd_int.h"
30#include "drbd_req.h"
31
32struct after_state_chg_work {
33 struct drbd_work w;
34 union drbd_state os;
35 union drbd_state ns;
36 enum chg_state_flags flags;
37 struct completion *done;
38};
39
99920dc5 40static int w_after_state_ch(struct drbd_work *w, int unused);
b8907339
PR
41static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
42 union drbd_state ns, enum chg_state_flags flags);
0e29d163 43static void after_all_state_ch(struct drbd_tconn *tconn);
a75f34ad
PR
44static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state);
45static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state);
3509502d 46static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns);
4308a0a3
PR
47static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns,
48 const char **warn_sync_abort);
b8907339 49
2aebfabb
PR
50static inline bool is_susp(union drbd_state s)
51{
52 return s.susp || s.susp_nod || s.susp_fen;
53}
54
d0456c72 55bool conn_all_vols_unconf(struct drbd_tconn *tconn)
0e29d163
PR
56{
57 struct drbd_conf *mdev;
e90285e0 58 int vnr;
0e29d163 59
e90285e0 60 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
d0456c72
LE
61 if (mdev->state.disk != D_DISKLESS ||
62 mdev->state.conn != C_STANDALONE ||
63 mdev->state.role != R_SECONDARY)
64 return false;
0e29d163 65 }
d0456c72 66 return true;
0e29d163
PR
67}
68
cb703454
PR
69/* Unfortunately the states where not correctly ordered, when
70 they where defined. therefore can not use max_t() here. */
71static enum drbd_role max_role(enum drbd_role role1, enum drbd_role role2)
72{
73 if (role1 == R_PRIMARY || role2 == R_PRIMARY)
74 return R_PRIMARY;
75 if (role1 == R_SECONDARY || role2 == R_SECONDARY)
76 return R_SECONDARY;
77 return R_UNKNOWN;
78}
79static enum drbd_role min_role(enum drbd_role role1, enum drbd_role role2)
80{
81 if (role1 == R_UNKNOWN || role2 == R_UNKNOWN)
82 return R_UNKNOWN;
83 if (role1 == R_SECONDARY || role2 == R_SECONDARY)
84 return R_SECONDARY;
85 return R_PRIMARY;
86}
87
88enum drbd_role conn_highest_role(struct drbd_tconn *tconn)
89{
90 enum drbd_role role = R_UNKNOWN;
91 struct drbd_conf *mdev;
92 int vnr;
93
94 idr_for_each_entry(&tconn->volumes, mdev, vnr)
95 role = max_role(role, mdev->state.role);
96
97 return role;
98}
99
100enum drbd_role conn_highest_peer(struct drbd_tconn *tconn)
101{
102 enum drbd_role peer = R_UNKNOWN;
103 struct drbd_conf *mdev;
104 int vnr;
105
106 idr_for_each_entry(&tconn->volumes, mdev, vnr)
107 peer = max_role(peer, mdev->state.peer);
108
109 return peer;
110}
111
112enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn)
113{
114 enum drbd_disk_state ds = D_DISKLESS;
115 struct drbd_conf *mdev;
116 int vnr;
117
118 idr_for_each_entry(&tconn->volumes, mdev, vnr)
119 ds = max_t(enum drbd_disk_state, ds, mdev->state.disk);
120
121 return ds;
122}
123
124enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn)
125{
126 enum drbd_disk_state ds = D_DISKLESS;
127 struct drbd_conf *mdev;
128 int vnr;
129
130 idr_for_each_entry(&tconn->volumes, mdev, vnr)
131 ds = max_t(enum drbd_disk_state, ds, mdev->state.pdsk);
132
133 return ds;
134}
135
b8907339
PR
136/**
137 * cl_wide_st_chg() - true if the state change is a cluster wide one
138 * @mdev: DRBD device.
139 * @os: old (current) state.
140 * @ns: new (wanted) state.
141 */
142static int cl_wide_st_chg(struct drbd_conf *mdev,
143 union drbd_state os, union drbd_state ns)
144{
145 return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED &&
146 ((os.role != R_PRIMARY && ns.role == R_PRIMARY) ||
147 (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
148 (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) ||
149 (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))) ||
150 (os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) ||
151 (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S);
152}
153
56707f9e
PR
154static union drbd_state
155apply_mask_val(union drbd_state os, union drbd_state mask, union drbd_state val)
156{
157 union drbd_state ns;
158 ns.i = (os.i & ~mask.i) | val.i;
159 return ns;
160}
161
b8907339
PR
162enum drbd_state_rv
163drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
164 union drbd_state mask, union drbd_state val)
165{
166 unsigned long flags;
56707f9e 167 union drbd_state ns;
b8907339
PR
168 enum drbd_state_rv rv;
169
170 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
78bae59b 171 ns = apply_mask_val(drbd_read_state(mdev), mask, val);
b8907339 172 rv = _drbd_set_state(mdev, ns, f, NULL);
b8907339
PR
173 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
174
175 return rv;
176}
177
178/**
179 * drbd_force_state() - Impose a change which happens outside our control on our state
180 * @mdev: DRBD device.
181 * @mask: mask of state bits to change.
182 * @val: value of new state bits.
183 */
184void drbd_force_state(struct drbd_conf *mdev,
185 union drbd_state mask, union drbd_state val)
186{
187 drbd_change_state(mdev, CS_HARD, mask, val);
188}
189
190static enum drbd_state_rv
191_req_st_cond(struct drbd_conf *mdev, union drbd_state mask,
192 union drbd_state val)
193{
194 union drbd_state os, ns;
195 unsigned long flags;
196 enum drbd_state_rv rv;
197
198 if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags))
199 return SS_CW_SUCCESS;
200
201 if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags))
202 return SS_CW_FAILED_BY_PEER;
203
b8907339 204 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
78bae59b 205 os = drbd_read_state(mdev);
56707f9e 206 ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL);
3509502d
PR
207 rv = is_valid_transition(os, ns);
208 if (rv == SS_SUCCESS)
209 rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
b8907339
PR
210
211 if (!cl_wide_st_chg(mdev, os, ns))
212 rv = SS_CW_NO_NEED;
3509502d 213 if (rv == SS_UNKNOWN_ERROR) {
b8907339
PR
214 rv = is_valid_state(mdev, ns);
215 if (rv == SS_SUCCESS) {
a75f34ad 216 rv = is_valid_soft_transition(os, ns);
b8907339
PR
217 if (rv == SS_SUCCESS)
218 rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
219 }
220 }
221 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
222
223 return rv;
224}
225
226/**
227 * drbd_req_state() - Perform an eventually cluster wide state change
228 * @mdev: DRBD device.
229 * @mask: mask of state bits to change.
230 * @val: value of new state bits.
231 * @f: flags
232 *
233 * Should not be called directly, use drbd_request_state() or
234 * _drbd_request_state().
235 */
236static enum drbd_state_rv
237drbd_req_state(struct drbd_conf *mdev, union drbd_state mask,
238 union drbd_state val, enum chg_state_flags f)
239{
240 struct completion done;
241 unsigned long flags;
242 union drbd_state os, ns;
243 enum drbd_state_rv rv;
244
245 init_completion(&done);
246
247 if (f & CS_SERIALIZE)
8410da8f 248 mutex_lock(mdev->state_mutex);
b8907339
PR
249
250 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
78bae59b 251 os = drbd_read_state(mdev);
56707f9e 252 ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL);
3509502d 253 rv = is_valid_transition(os, ns);
3c5e5f6a
LE
254 if (rv < SS_SUCCESS) {
255 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
3509502d 256 goto abort;
3c5e5f6a 257 }
b8907339
PR
258
259 if (cl_wide_st_chg(mdev, os, ns)) {
260 rv = is_valid_state(mdev, ns);
261 if (rv == SS_SUCCESS)
a75f34ad 262 rv = is_valid_soft_transition(os, ns);
b8907339
PR
263 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
264
265 if (rv < SS_SUCCESS) {
266 if (f & CS_VERBOSE)
267 print_st_err(mdev, os, ns, rv);
268 goto abort;
269 }
270
d24ae219 271 if (drbd_send_state_req(mdev, mask, val)) {
b8907339
PR
272 rv = SS_CW_FAILED_BY_PEER;
273 if (f & CS_VERBOSE)
274 print_st_err(mdev, os, ns, rv);
275 goto abort;
276 }
277
278 wait_event(mdev->state_wait,
279 (rv = _req_st_cond(mdev, mask, val)));
280
281 if (rv < SS_SUCCESS) {
b8907339
PR
282 if (f & CS_VERBOSE)
283 print_st_err(mdev, os, ns, rv);
284 goto abort;
285 }
286 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
78bae59b 287 ns = apply_mask_val(drbd_read_state(mdev), mask, val);
b8907339 288 rv = _drbd_set_state(mdev, ns, f, &done);
b8907339
PR
289 } else {
290 rv = _drbd_set_state(mdev, ns, f, &done);
291 }
292
293 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
294
295 if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) {
296 D_ASSERT(current != mdev->tconn->worker.task);
297 wait_for_completion(&done);
298 }
299
300abort:
301 if (f & CS_SERIALIZE)
8410da8f 302 mutex_unlock(mdev->state_mutex);
b8907339
PR
303
304 return rv;
305}
306
307/**
308 * _drbd_request_state() - Request a state change (with flags)
309 * @mdev: DRBD device.
310 * @mask: mask of state bits to change.
311 * @val: value of new state bits.
312 * @f: flags
313 *
314 * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
315 * flag, or when logging of failed state change requests is not desired.
316 */
317enum drbd_state_rv
318_drbd_request_state(struct drbd_conf *mdev, union drbd_state mask,
319 union drbd_state val, enum chg_state_flags f)
320{
321 enum drbd_state_rv rv;
322
323 wait_event(mdev->state_wait,
324 (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE);
325
326 return rv;
327}
328
329static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns)
330{
331 dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n",
332 name,
333 drbd_conn_str(ns.conn),
334 drbd_role_str(ns.role),
335 drbd_role_str(ns.peer),
336 drbd_disk_str(ns.disk),
337 drbd_disk_str(ns.pdsk),
338 is_susp(ns) ? 's' : 'r',
339 ns.aftr_isp ? 'a' : '-',
340 ns.peer_isp ? 'p' : '-',
341 ns.user_isp ? 'u' : '-',
342 ns.susp_fen ? 'F' : '-',
343 ns.susp_nod ? 'N' : '-'
344 );
345}
346
347void print_st_err(struct drbd_conf *mdev, union drbd_state os,
348 union drbd_state ns, enum drbd_state_rv err)
349{
350 if (err == SS_IN_TRANSIENT_STATE)
351 return;
352 dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err));
353 print_st(mdev, " state", os);
354 print_st(mdev, "wanted", ns);
355}
356
435693e8 357static long print_state_change(char *pb, union drbd_state os, union drbd_state ns,
bbeb641c
PR
358 enum chg_state_flags flags)
359{
435693e8 360 char *pbp;
bbeb641c
PR
361 pbp = pb;
362 *pbp = 0;
435693e8 363 if (ns.role != os.role && flags & CS_DC_ROLE)
bbeb641c
PR
364 pbp += sprintf(pbp, "role( %s -> %s ) ",
365 drbd_role_str(os.role),
366 drbd_role_str(ns.role));
435693e8 367 if (ns.peer != os.peer && flags & CS_DC_PEER)
bbeb641c
PR
368 pbp += sprintf(pbp, "peer( %s -> %s ) ",
369 drbd_role_str(os.peer),
370 drbd_role_str(ns.peer));
435693e8 371 if (ns.conn != os.conn && flags & CS_DC_CONN)
bbeb641c
PR
372 pbp += sprintf(pbp, "conn( %s -> %s ) ",
373 drbd_conn_str(os.conn),
374 drbd_conn_str(ns.conn));
435693e8 375 if (ns.disk != os.disk && flags & CS_DC_DISK)
bbeb641c
PR
376 pbp += sprintf(pbp, "disk( %s -> %s ) ",
377 drbd_disk_str(os.disk),
378 drbd_disk_str(ns.disk));
435693e8 379 if (ns.pdsk != os.pdsk && flags & CS_DC_PDSK)
bbeb641c
PR
380 pbp += sprintf(pbp, "pdsk( %s -> %s ) ",
381 drbd_disk_str(os.pdsk),
382 drbd_disk_str(ns.pdsk));
383 if (is_susp(ns) != is_susp(os))
384 pbp += sprintf(pbp, "susp( %d -> %d ) ",
385 is_susp(os),
386 is_susp(ns));
387 if (ns.aftr_isp != os.aftr_isp)
388 pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ",
389 os.aftr_isp,
390 ns.aftr_isp);
391 if (ns.peer_isp != os.peer_isp)
392 pbp += sprintf(pbp, "peer_isp( %d -> %d ) ",
393 os.peer_isp,
394 ns.peer_isp);
395 if (ns.user_isp != os.user_isp)
396 pbp += sprintf(pbp, "user_isp( %d -> %d ) ",
397 os.user_isp,
398 ns.user_isp);
435693e8
PR
399
400 return pbp - pb;
401}
402
403static void drbd_pr_state_change(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns,
404 enum chg_state_flags flags)
405{
406 char pb[300];
407
408 if (print_state_change(pb, os, ns, flags ^ CS_DC_MASK))
bbeb641c
PR
409 dev_info(DEV, "%s\n", pb);
410}
b8907339 411
435693e8
PR
412static void conn_pr_state_change(struct drbd_tconn *tconn, union drbd_state os, union drbd_state ns,
413 enum chg_state_flags flags)
414{
415 char pb[300];
416
417 if (print_state_change(pb, os, ns, flags))
418 conn_info(tconn, "%s\n", pb);
419}
420
421
b8907339
PR
422/**
423 * is_valid_state() - Returns an SS_ error code if ns is not valid
424 * @mdev: DRBD device.
425 * @ns: State to consider.
426 */
427static enum drbd_state_rv
428is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
429{
430 /* See drbd_state_sw_errors in drbd_strings.c */
431
432 enum drbd_fencing_p fp;
433 enum drbd_state_rv rv = SS_SUCCESS;
434
435 fp = FP_DONT_CARE;
436 if (get_ldev(mdev)) {
437 fp = mdev->ldev->dc.fencing;
438 put_ldev(mdev);
439 }
440
441 if (get_net_conf(mdev->tconn)) {
047e95e2
PR
442 if (!mdev->tconn->net_conf->two_primaries && ns.role == R_PRIMARY) {
443 if (ns.peer == R_PRIMARY)
444 rv = SS_TWO_PRIMARIES;
cb703454 445 else if (conn_highest_peer(mdev->tconn) == R_PRIMARY)
047e95e2
PR
446 rv = SS_O_VOL_PEER_PRI;
447 }
b8907339
PR
448 put_net_conf(mdev->tconn);
449 }
450
451 if (rv <= 0)
452 /* already found a reason to abort */;
453 else if (ns.role == R_SECONDARY && mdev->open_cnt)
454 rv = SS_DEVICE_IN_USE;
455
456 else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE)
457 rv = SS_NO_UP_TO_DATE_DISK;
458
459 else if (fp >= FP_RESOURCE &&
460 ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN)
461 rv = SS_PRIMARY_NOP;
462
463 else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT)
464 rv = SS_NO_UP_TO_DATE_DISK;
465
466 else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT)
467 rv = SS_NO_LOCAL_DISK;
468
469 else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT)
470 rv = SS_NO_REMOTE_DISK;
471
472 else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)
473 rv = SS_NO_UP_TO_DATE_DISK;
474
475 else if ((ns.conn == C_CONNECTED ||
476 ns.conn == C_WF_BITMAP_S ||
477 ns.conn == C_SYNC_SOURCE ||
478 ns.conn == C_PAUSED_SYNC_S) &&
479 ns.disk == D_OUTDATED)
480 rv = SS_CONNECTED_OUTDATES;
481
482 else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
f399002e 483 (mdev->tconn->net_conf->verify_alg[0] == 0))
b8907339
PR
484 rv = SS_NO_VERIFY_ALG;
485
486 else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
487 mdev->tconn->agreed_pro_version < 88)
488 rv = SS_NOT_SUPPORTED;
489
490 else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN)
491 rv = SS_CONNECTED_OUTDATES;
492
493 return rv;
494}
495
496/**
a75f34ad 497 * is_valid_soft_transition() - Returns an SS_ error code if the state transition is not possible
3509502d
PR
498 * This function limits state transitions that may be declined by DRBD. I.e.
499 * user requests (aka soft transitions).
b8907339
PR
500 * @mdev: DRBD device.
501 * @ns: new state.
502 * @os: old state.
503 */
504static enum drbd_state_rv
a75f34ad 505is_valid_soft_transition(union drbd_state os, union drbd_state ns)
b8907339
PR
506{
507 enum drbd_state_rv rv = SS_SUCCESS;
508
509 if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) &&
510 os.conn > C_CONNECTED)
511 rv = SS_RESYNC_RUNNING;
512
513 if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE)
514 rv = SS_ALREADY_STANDALONE;
515
516 if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS)
517 rv = SS_IS_DISKLESS;
518
519 if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED)
520 rv = SS_NO_NET_CONFIG;
521
522 if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING)
523 rv = SS_LOWER_THAN_OUTDATED;
524
525 if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED)
526 rv = SS_IN_TRANSIENT_STATE;
527
2325eb66
PR
528 /* if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS)
529 rv = SS_IN_TRANSIENT_STATE; */
b8907339
PR
530
531 if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED)
532 rv = SS_NEED_CONNECTION;
533
534 if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
535 ns.conn != os.conn && os.conn > C_CONNECTED)
536 rv = SS_RESYNC_RUNNING;
537
538 if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) &&
539 os.conn < C_CONNECTED)
540 rv = SS_NEED_CONNECTION;
541
542 if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)
543 && os.conn < C_WF_REPORT_PARAMS)
544 rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */
545
546 return rv;
547}
548
3509502d 549static enum drbd_state_rv
fda74117 550is_valid_conn_transition(enum drbd_conns oc, enum drbd_conns nc)
3509502d
PR
551{
552 enum drbd_state_rv rv = SS_SUCCESS;
553
554 /* Disallow Network errors to configure a device's network part */
fda74117 555 if ((nc >= C_TIMEOUT && nc <= C_TEAR_DOWN) && oc <= C_DISCONNECTING)
3509502d
PR
556 rv = SS_NEED_CONNECTION;
557
558 /* After a network error only C_UNCONNECTED or C_DISCONNECTING may follow. */
fda74117 559 if (oc >= C_TIMEOUT && oc <= C_TEAR_DOWN && nc != C_UNCONNECTED && nc != C_DISCONNECTING)
3509502d
PR
560 rv = SS_IN_TRANSIENT_STATE;
561
562 /* After C_DISCONNECTING only C_STANDALONE may follow */
fda74117 563 if (oc == C_DISCONNECTING && nc != C_STANDALONE)
3509502d
PR
564 rv = SS_IN_TRANSIENT_STATE;
565
fda74117
PR
566 return rv;
567}
568
569
570/**
571 * is_valid_transition() - Returns an SS_ error code if the state transition is not possible
572 * This limits hard state transitions. Hard state transitions are facts there are
573 * imposed on DRBD by the environment. E.g. disk broke or network broke down.
574 * But those hard state transitions are still not allowed to do everything.
575 * @ns: new state.
576 * @os: old state.
577 */
578static enum drbd_state_rv
579is_valid_transition(union drbd_state os, union drbd_state ns)
580{
581 enum drbd_state_rv rv;
582
583 rv = is_valid_conn_transition(os.conn, ns.conn);
584
3509502d
PR
585 /* we cannot fail (again) if we already detached */
586 if (ns.disk == D_FAILED && os.disk == D_DISKLESS)
587 rv = SS_IS_DISKLESS;
588
4308a0a3
PR
589 /* if we are only D_ATTACHING yet,
590 * we can (and should) go directly to D_DISKLESS. */
591 if (ns.disk == D_FAILED && os.disk == D_ATTACHING) {
592 printk("TODO: FIX ME\n");
593 rv = SS_IS_DISKLESS;
594 }
595
3509502d
PR
596 return rv;
597}
598
b8907339
PR
599/**
600 * sanitize_state() - Resolves implicitly necessary additional changes to a state transition
601 * @mdev: DRBD device.
602 * @os: old state.
603 * @ns: new state.
604 * @warn_sync_abort:
605 *
606 * When we loose connection, we have to set the state of the peers disk (pdsk)
607 * to D_UNKNOWN. This rule and many more along those lines are in this function.
608 */
4308a0a3
PR
609static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns,
610 const char **warn_sync_abort)
b8907339
PR
611{
612 enum drbd_fencing_p fp;
613 enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
614
615 fp = FP_DONT_CARE;
616 if (get_ldev(mdev)) {
617 fp = mdev->ldev->dc.fencing;
618 put_ldev(mdev);
619 }
620
3509502d 621 /* Implications from connection to peer and peer_isp */
b8907339
PR
622 if (ns.conn < C_CONNECTED) {
623 ns.peer_isp = 0;
624 ns.peer = R_UNKNOWN;
625 if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT)
626 ns.pdsk = D_UNKNOWN;
627 }
628
629 /* Clear the aftr_isp when becoming unconfigured */
630 if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY)
631 ns.aftr_isp = 0;
632
4308a0a3 633 /* An implication of the disk states onto the connection state */
b8907339 634 /* Abort resync if a disk fails/detaches */
4308a0a3 635 if (ns.conn > C_CONNECTED && (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) {
b8907339
PR
636 if (warn_sync_abort)
637 *warn_sync_abort =
4308a0a3 638 ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T ?
b8907339
PR
639 "Online-verify" : "Resync";
640 ns.conn = C_CONNECTED;
641 }
642
643 /* Connection breaks down before we finished "Negotiating" */
644 if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING &&
645 get_ldev_if_state(mdev, D_NEGOTIATING)) {
646 if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) {
647 ns.disk = mdev->new_state_tmp.disk;
648 ns.pdsk = mdev->new_state_tmp.pdsk;
649 } else {
650 dev_alert(DEV, "Connection lost while negotiating, no data!\n");
651 ns.disk = D_DISKLESS;
652 ns.pdsk = D_UNKNOWN;
653 }
654 put_ldev(mdev);
655 }
656
657 /* D_CONSISTENT and D_OUTDATED vanish when we get connected */
658 if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) {
659 if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED)
660 ns.disk = D_UP_TO_DATE;
661 if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)
662 ns.pdsk = D_UP_TO_DATE;
663 }
664
665 /* Implications of the connection stat on the disk states */
666 disk_min = D_DISKLESS;
667 disk_max = D_UP_TO_DATE;
668 pdsk_min = D_INCONSISTENT;
669 pdsk_max = D_UNKNOWN;
670 switch ((enum drbd_conns)ns.conn) {
671 case C_WF_BITMAP_T:
672 case C_PAUSED_SYNC_T:
673 case C_STARTING_SYNC_T:
674 case C_WF_SYNC_UUID:
675 case C_BEHIND:
676 disk_min = D_INCONSISTENT;
677 disk_max = D_OUTDATED;
678 pdsk_min = D_UP_TO_DATE;
679 pdsk_max = D_UP_TO_DATE;
680 break;
681 case C_VERIFY_S:
682 case C_VERIFY_T:
683 disk_min = D_UP_TO_DATE;
684 disk_max = D_UP_TO_DATE;
685 pdsk_min = D_UP_TO_DATE;
686 pdsk_max = D_UP_TO_DATE;
687 break;
688 case C_CONNECTED:
689 disk_min = D_DISKLESS;
690 disk_max = D_UP_TO_DATE;
691 pdsk_min = D_DISKLESS;
692 pdsk_max = D_UP_TO_DATE;
693 break;
694 case C_WF_BITMAP_S:
695 case C_PAUSED_SYNC_S:
696 case C_STARTING_SYNC_S:
697 case C_AHEAD:
698 disk_min = D_UP_TO_DATE;
699 disk_max = D_UP_TO_DATE;
700 pdsk_min = D_INCONSISTENT;
701 pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/
702 break;
703 case C_SYNC_TARGET:
704 disk_min = D_INCONSISTENT;
705 disk_max = D_INCONSISTENT;
706 pdsk_min = D_UP_TO_DATE;
707 pdsk_max = D_UP_TO_DATE;
708 break;
709 case C_SYNC_SOURCE:
710 disk_min = D_UP_TO_DATE;
711 disk_max = D_UP_TO_DATE;
712 pdsk_min = D_INCONSISTENT;
713 pdsk_max = D_INCONSISTENT;
714 break;
715 case C_STANDALONE:
716 case C_DISCONNECTING:
717 case C_UNCONNECTED:
718 case C_TIMEOUT:
719 case C_BROKEN_PIPE:
720 case C_NETWORK_FAILURE:
721 case C_PROTOCOL_ERROR:
722 case C_TEAR_DOWN:
723 case C_WF_CONNECTION:
724 case C_WF_REPORT_PARAMS:
725 case C_MASK:
726 break;
727 }
728 if (ns.disk > disk_max)
729 ns.disk = disk_max;
730
731 if (ns.disk < disk_min) {
732 dev_warn(DEV, "Implicitly set disk from %s to %s\n",
733 drbd_disk_str(ns.disk), drbd_disk_str(disk_min));
734 ns.disk = disk_min;
735 }
736 if (ns.pdsk > pdsk_max)
737 ns.pdsk = pdsk_max;
738
739 if (ns.pdsk < pdsk_min) {
740 dev_warn(DEV, "Implicitly set pdsk from %s to %s\n",
741 drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min));
742 ns.pdsk = pdsk_min;
743 }
744
745 if (fp == FP_STONITH &&
4308a0a3 746 (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED))
b8907339
PR
747 ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
748
f399002e 749 if (mdev->tconn->res_opts.on_no_data == OND_SUSPEND_IO &&
4308a0a3 750 (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
b8907339
PR
751 ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */
752
753 if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
754 if (ns.conn == C_SYNC_SOURCE)
755 ns.conn = C_PAUSED_SYNC_S;
756 if (ns.conn == C_SYNC_TARGET)
757 ns.conn = C_PAUSED_SYNC_T;
758 } else {
759 if (ns.conn == C_PAUSED_SYNC_S)
760 ns.conn = C_SYNC_SOURCE;
761 if (ns.conn == C_PAUSED_SYNC_T)
762 ns.conn = C_SYNC_TARGET;
763 }
764
765 return ns;
766}
767
768void drbd_resume_al(struct drbd_conf *mdev)
769{
770 if (test_and_clear_bit(AL_SUSPENDED, &mdev->flags))
771 dev_info(DEV, "Resumed AL updates\n");
772}
773
774/* helper for __drbd_set_state */
775static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
776{
777 if (mdev->tconn->agreed_pro_version < 90)
778 mdev->ov_start_sector = 0;
779 mdev->rs_total = drbd_bm_bits(mdev);
780 mdev->ov_position = 0;
781 if (cs == C_VERIFY_T) {
782 /* starting online verify from an arbitrary position
783 * does not fit well into the existing protocol.
784 * on C_VERIFY_T, we initialize ov_left and friends
785 * implicitly in receive_DataRequest once the
786 * first P_OV_REQUEST is received */
787 mdev->ov_start_sector = ~(sector_t)0;
788 } else {
789 unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector);
790 if (bit >= mdev->rs_total) {
791 mdev->ov_start_sector =
792 BM_BIT_TO_SECT(mdev->rs_total - 1);
793 mdev->rs_total = 1;
794 } else
795 mdev->rs_total -= bit;
796 mdev->ov_position = mdev->ov_start_sector;
797 }
798 mdev->ov_left = mdev->rs_total;
799}
800
801/**
802 * __drbd_set_state() - Set a new DRBD state
803 * @mdev: DRBD device.
804 * @ns: new state.
805 * @flags: Flags
806 * @done: Optional completion, that will get completed after the after_state_ch() finished
807 *
808 * Caller needs to hold req_lock, and global_state_lock. Do not call directly.
809 */
810enum drbd_state_rv
811__drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
812 enum chg_state_flags flags, struct completion *done)
813{
814 union drbd_state os;
815 enum drbd_state_rv rv = SS_SUCCESS;
816 const char *warn_sync_abort = NULL;
817 struct after_state_chg_work *ascw;
818
78bae59b 819 os = drbd_read_state(mdev);
b8907339 820
4308a0a3 821 ns = sanitize_state(mdev, ns, &warn_sync_abort);
b8907339
PR
822 if (ns.i == os.i)
823 return SS_NOTHING_TO_DO;
824
3509502d
PR
825 rv = is_valid_transition(os, ns);
826 if (rv < SS_SUCCESS)
827 return rv;
828
b8907339
PR
829 if (!(flags & CS_HARD)) {
830 /* pre-state-change checks ; only look at ns */
831 /* See drbd_state_sw_errors in drbd_strings.c */
832
833 rv = is_valid_state(mdev, ns);
834 if (rv < SS_SUCCESS) {
835 /* If the old state was illegal as well, then let
836 this happen...*/
837
838 if (is_valid_state(mdev, os) == rv)
a75f34ad 839 rv = is_valid_soft_transition(os, ns);
b8907339 840 } else
a75f34ad 841 rv = is_valid_soft_transition(os, ns);
b8907339
PR
842 }
843
844 if (rv < SS_SUCCESS) {
845 if (flags & CS_VERBOSE)
846 print_st_err(mdev, os, ns, rv);
847 return rv;
848 }
849
850 if (warn_sync_abort)
851 dev_warn(DEV, "%s aborted.\n", warn_sync_abort);
852
435693e8 853 drbd_pr_state_change(mdev, os, ns, flags);
b8907339 854
b8907339
PR
855 /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference
856 * on the ldev here, to be sure the transition -> D_DISKLESS resp.
857 * drbd_ldev_destroy() won't happen before our corresponding
858 * after_state_ch works run, where we put_ldev again. */
859 if ((os.disk != D_FAILED && ns.disk == D_FAILED) ||
860 (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))
861 atomic_inc(&mdev->local_cnt);
862
da9fbc27 863 mdev->state.i = ns.i;
8e0af25f
PR
864 mdev->tconn->susp = ns.susp;
865 mdev->tconn->susp_nod = ns.susp_nod;
866 mdev->tconn->susp_fen = ns.susp_fen;
b8907339 867
0e29d163
PR
868 /* solve the race between becoming unconfigured,
869 * worker doing the cleanup, and
870 * admin reconfiguring us:
871 * on (re)configure, first set CONFIG_PENDING,
872 * then wait for a potentially exiting worker,
873 * start the worker, and schedule one no_op.
874 * then proceed with configuration.
875 */
876 if(conn_all_vols_unconf(mdev->tconn) &&
877 !test_and_set_bit(CONFIG_PENDING, &mdev->tconn->flags))
878 set_bit(OBJECT_DYING, &mdev->tconn->flags);
879
b8907339
PR
880 if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
881 drbd_print_uuids(mdev, "attached to UUIDs");
882
883 wake_up(&mdev->misc_wait);
884 wake_up(&mdev->state_wait);
2a67d8b9 885 wake_up(&mdev->tconn->ping_wait);
b8907339
PR
886
887 /* aborted verify run. log the last position */
888 if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
889 ns.conn < C_CONNECTED) {
890 mdev->ov_start_sector =
891 BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left);
892 dev_info(DEV, "Online Verify reached sector %llu\n",
893 (unsigned long long)mdev->ov_start_sector);
894 }
895
896 if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) &&
897 (ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)) {
898 dev_info(DEV, "Syncer continues.\n");
899 mdev->rs_paused += (long)jiffies
900 -(long)mdev->rs_mark_time[mdev->rs_last_mark];
901 if (ns.conn == C_SYNC_TARGET)
902 mod_timer(&mdev->resync_timer, jiffies);
903 }
904
905 if ((os.conn == C_SYNC_TARGET || os.conn == C_SYNC_SOURCE) &&
906 (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) {
907 dev_info(DEV, "Resync suspended\n");
908 mdev->rs_mark_time[mdev->rs_last_mark] = jiffies;
909 }
910
911 if (os.conn == C_CONNECTED &&
912 (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) {
913 unsigned long now = jiffies;
914 int i;
915
916 set_ov_position(mdev, ns.conn);
917 mdev->rs_start = now;
918 mdev->rs_last_events = 0;
919 mdev->rs_last_sect_ev = 0;
920 mdev->ov_last_oos_size = 0;
921 mdev->ov_last_oos_start = 0;
922
923 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
924 mdev->rs_mark_left[i] = mdev->ov_left;
925 mdev->rs_mark_time[i] = now;
926 }
927
928 drbd_rs_controller_reset(mdev);
929
930 if (ns.conn == C_VERIFY_S) {
931 dev_info(DEV, "Starting Online Verify from sector %llu\n",
932 (unsigned long long)mdev->ov_position);
933 mod_timer(&mdev->resync_timer, jiffies);
934 }
935 }
936
937 if (get_ldev(mdev)) {
938 u32 mdf = mdev->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND|
939 MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE|
940 MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY);
941
942 if (test_bit(CRASHED_PRIMARY, &mdev->flags))
943 mdf |= MDF_CRASHED_PRIMARY;
944 if (mdev->state.role == R_PRIMARY ||
945 (mdev->state.pdsk < D_INCONSISTENT && mdev->state.peer == R_PRIMARY))
946 mdf |= MDF_PRIMARY_IND;
947 if (mdev->state.conn > C_WF_REPORT_PARAMS)
948 mdf |= MDF_CONNECTED_IND;
949 if (mdev->state.disk > D_INCONSISTENT)
950 mdf |= MDF_CONSISTENT;
951 if (mdev->state.disk > D_OUTDATED)
952 mdf |= MDF_WAS_UP_TO_DATE;
953 if (mdev->state.pdsk <= D_OUTDATED && mdev->state.pdsk >= D_INCONSISTENT)
954 mdf |= MDF_PEER_OUT_DATED;
955 if (mdf != mdev->ldev->md.flags) {
956 mdev->ldev->md.flags = mdf;
957 drbd_md_mark_dirty(mdev);
958 }
959 if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT)
960 drbd_set_ed_uuid(mdev, mdev->ldev->md.uuid[UI_CURRENT]);
961 put_ldev(mdev);
962 }
963
964 /* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */
965 if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT &&
966 os.peer == R_SECONDARY && ns.peer == R_PRIMARY)
967 set_bit(CONSIDER_RESYNC, &mdev->flags);
968
969 /* Receiver should clean up itself */
970 if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING)
971 drbd_thread_stop_nowait(&mdev->tconn->receiver);
972
973 /* Now the receiver finished cleaning up itself, it should die */
974 if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE)
975 drbd_thread_stop_nowait(&mdev->tconn->receiver);
976
977 /* Upon network failure, we need to restart the receiver. */
978 if (os.conn > C_TEAR_DOWN &&
979 ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
980 drbd_thread_restart_nowait(&mdev->tconn->receiver);
981
982 /* Resume AL writing if we get a connection */
983 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
984 drbd_resume_al(mdev);
985
986 ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
987 if (ascw) {
988 ascw->os = os;
989 ascw->ns = ns;
990 ascw->flags = flags;
991 ascw->w.cb = w_after_state_ch;
a21e9298 992 ascw->w.mdev = mdev;
b8907339
PR
993 ascw->done = done;
994 drbd_queue_work(&mdev->tconn->data.work, &ascw->w);
995 } else {
bbeb641c 996 dev_err(DEV, "Could not kmalloc an ascw\n");
b8907339
PR
997 }
998
999 return rv;
1000}
1001
99920dc5 1002static int w_after_state_ch(struct drbd_work *w, int unused)
b8907339
PR
1003{
1004 struct after_state_chg_work *ascw =
1005 container_of(w, struct after_state_chg_work, w);
00d56944 1006 struct drbd_conf *mdev = w->mdev;
b8907339
PR
1007
1008 after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags);
1009 if (ascw->flags & CS_WAIT_COMPLETE) {
1010 D_ASSERT(ascw->done != NULL);
1011 complete(ascw->done);
1012 }
1013 kfree(ascw);
1014
99920dc5 1015 return 0;
b8907339
PR
1016}
1017
1018static void abw_start_sync(struct drbd_conf *mdev, int rv)
1019{
1020 if (rv) {
1021 dev_err(DEV, "Writing the bitmap failed not starting resync.\n");
1022 _drbd_request_state(mdev, NS(conn, C_CONNECTED), CS_VERBOSE);
1023 return;
1024 }
1025
1026 switch (mdev->state.conn) {
1027 case C_STARTING_SYNC_T:
1028 _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
1029 break;
1030 case C_STARTING_SYNC_S:
1031 drbd_start_resync(mdev, C_SYNC_SOURCE);
1032 break;
1033 }
1034}
1035
1036int drbd_bitmap_io_from_worker(struct drbd_conf *mdev,
1037 int (*io_fn)(struct drbd_conf *),
1038 char *why, enum bm_flag flags)
1039{
1040 int rv;
1041
1042 D_ASSERT(current == mdev->tconn->worker.task);
1043
1044 /* open coded non-blocking drbd_suspend_io(mdev); */
1045 set_bit(SUSPEND_IO, &mdev->flags);
1046
1047 drbd_bm_lock(mdev, why, flags);
1048 rv = io_fn(mdev);
1049 drbd_bm_unlock(mdev);
1050
1051 drbd_resume_io(mdev);
1052
1053 return rv;
1054}
1055
1056/**
1057 * after_state_ch() - Perform after state change actions that may sleep
1058 * @mdev: DRBD device.
1059 * @os: old state.
1060 * @ns: new state.
1061 * @flags: Flags
1062 */
1063static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1064 union drbd_state ns, enum chg_state_flags flags)
1065{
1066 enum drbd_fencing_p fp;
1067 enum drbd_req_event what = NOTHING;
69f5ec72 1068 union drbd_state nsm;
3b98c0c2
LE
1069 struct sib_info sib;
1070
1071 sib.sib_reason = SIB_STATE_CHANGE;
1072 sib.os = os;
1073 sib.ns = ns;
b8907339
PR
1074
1075 if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) {
1076 clear_bit(CRASHED_PRIMARY, &mdev->flags);
1077 if (mdev->p_uuid)
1078 mdev->p_uuid[UI_FLAGS] &= ~((u64)2);
1079 }
1080
1081 fp = FP_DONT_CARE;
1082 if (get_ldev(mdev)) {
1083 fp = mdev->ldev->dc.fencing;
1084 put_ldev(mdev);
1085 }
1086
1087 /* Inform userspace about the change... */
3b98c0c2 1088 drbd_bcast_event(mdev, &sib);
b8907339
PR
1089
1090 if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) &&
1091 (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
1092 drbd_khelper(mdev, "pri-on-incon-degr");
1093
1094 /* Here we have the actions that are performed after a
1095 state change. This function might sleep */
1096
1097 nsm.i = -1;
1098 if (ns.susp_nod) {
1099 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
1100 what = RESEND;
1101
1102 if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING)
1103 what = RESTART_FROZEN_DISK_IO;
1104
1105 if (what != NOTHING)
1106 nsm.susp_nod = 0;
1107 }
1108
1109 if (ns.susp_fen) {
1110 /* case1: The outdate peer handler is successful: */
1111 if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) {
2f5cdd0b 1112 tl_clear(mdev->tconn);
b8907339
PR
1113 if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
1114 drbd_uuid_new_current(mdev);
1115 clear_bit(NEW_CUR_UUID, &mdev->flags);
1116 }
1117 spin_lock_irq(&mdev->tconn->req_lock);
1118 _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL);
1119 spin_unlock_irq(&mdev->tconn->req_lock);
1120 }
1121 /* case2: The connection was established again: */
1122 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
1123 clear_bit(NEW_CUR_UUID, &mdev->flags);
1124 what = RESEND;
1125 nsm.susp_fen = 0;
1126 }
1127 }
1128
1129 if (what != NOTHING) {
1130 spin_lock_irq(&mdev->tconn->req_lock);
2f5cdd0b 1131 _tl_restart(mdev->tconn, what);
b8907339
PR
1132 nsm.i &= mdev->state.i;
1133 _drbd_set_state(mdev, nsm, CS_VERBOSE, NULL);
1134 spin_unlock_irq(&mdev->tconn->req_lock);
1135 }
1136
1137 /* Became sync source. With protocol >= 96, we still need to send out
1138 * the sync uuid now. Need to do that before any drbd_send_state, or
1139 * the other side may go "paused sync" before receiving the sync uuids,
1140 * which is unexpected. */
1141 if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
1142 (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
1143 mdev->tconn->agreed_pro_version >= 96 && get_ldev(mdev)) {
1144 drbd_gen_and_send_sync_uuid(mdev);
1145 put_ldev(mdev);
1146 }
1147
1148 /* Do not change the order of the if above and the two below... */
1149 if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */
1150 drbd_send_uuids(mdev);
1151 drbd_send_state(mdev);
1152 }
1153 /* No point in queuing send_bitmap if we don't have a connection
1154 * anymore, so check also the _current_ state, not only the new state
1155 * at the time this work was queued. */
1156 if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S &&
1157 mdev->state.conn == C_WF_BITMAP_S)
1158 drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL,
1159 "send_bitmap (WFBitMapS)",
1160 BM_LOCKED_TEST_ALLOWED);
1161
1162 /* Lost contact to peer's copy of the data */
1163 if ((os.pdsk >= D_INCONSISTENT &&
1164 os.pdsk != D_UNKNOWN &&
1165 os.pdsk != D_OUTDATED)
1166 && (ns.pdsk < D_INCONSISTENT ||
1167 ns.pdsk == D_UNKNOWN ||
1168 ns.pdsk == D_OUTDATED)) {
1169 if (get_ldev(mdev)) {
1170 if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
1171 mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
2aebfabb 1172 if (drbd_suspended(mdev)) {
b8907339
PR
1173 set_bit(NEW_CUR_UUID, &mdev->flags);
1174 } else {
1175 drbd_uuid_new_current(mdev);
1176 drbd_send_uuids(mdev);
1177 }
1178 }
1179 put_ldev(mdev);
1180 }
1181 }
1182
1183 if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) {
1184 if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) {
1185 drbd_uuid_new_current(mdev);
1186 drbd_send_uuids(mdev);
1187 }
1188
1189 /* D_DISKLESS Peer becomes secondary */
1190 if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
1191 /* We may still be Primary ourselves.
1192 * No harm done if the bitmap still changes,
1193 * redirtied pages will follow later. */
1194 drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
1195 "demote diskless peer", BM_LOCKED_SET_ALLOWED);
1196 put_ldev(mdev);
1197 }
1198
1199 /* Write out all changed bits on demote.
1200 * Though, no need to da that just yet
1201 * if there is a resync going on still */
1202 if (os.role == R_PRIMARY && ns.role == R_SECONDARY &&
1203 mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) {
1204 /* No changes to the bitmap expected this time, so assert that,
1205 * even though no harm was done if it did change. */
1206 drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
1207 "demote", BM_LOCKED_TEST_ALLOWED);
1208 put_ldev(mdev);
1209 }
1210
1211 /* Last part of the attaching process ... */
1212 if (ns.conn >= C_CONNECTED &&
1213 os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
1214 drbd_send_sizes(mdev, 0, 0); /* to start sync... */
1215 drbd_send_uuids(mdev);
1216 drbd_send_state(mdev);
1217 }
1218
1219 /* We want to pause/continue resync, tell peer. */
1220 if (ns.conn >= C_CONNECTED &&
1221 ((os.aftr_isp != ns.aftr_isp) ||
1222 (os.user_isp != ns.user_isp)))
1223 drbd_send_state(mdev);
1224
1225 /* In case one of the isp bits got set, suspend other devices. */
1226 if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) &&
1227 (ns.aftr_isp || ns.peer_isp || ns.user_isp))
1228 suspend_other_sg(mdev);
1229
1230 /* Make sure the peer gets informed about eventual state
1231 changes (ISP bits) while we were in WFReportParams. */
1232 if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
1233 drbd_send_state(mdev);
1234
1235 if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
1236 drbd_send_state(mdev);
1237
1238 /* We are in the progress to start a full sync... */
1239 if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
1240 (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S))
1241 /* no other bitmap changes expected during this phase */
1242 drbd_queue_bitmap_io(mdev,
1243 &drbd_bmio_set_n_write, &abw_start_sync,
1244 "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED);
1245
1246 /* We are invalidating our self... */
1247 if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED &&
1248 os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
1249 /* other bitmap operation expected during this phase */
1250 drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL,
1251 "set_n_write from invalidate", BM_LOCKED_MASK);
1252
1253 /* first half of local IO error, failure to attach,
1254 * or administrative detach */
1255 if (os.disk != D_FAILED && ns.disk == D_FAILED) {
1256 enum drbd_io_error_p eh;
1257 int was_io_error;
1258 /* corresponding get_ldev was in __drbd_set_state, to serialize
1259 * our cleanup here with the transition to D_DISKLESS,
1260 * so it is safe to dreference ldev here. */
1261 eh = mdev->ldev->dc.on_io_error;
1262 was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);
1263
1264 /* current state still has to be D_FAILED,
1265 * there is only one way out: to D_DISKLESS,
1266 * and that may only happen after our put_ldev below. */
1267 if (mdev->state.disk != D_FAILED)
1268 dev_err(DEV,
1269 "ASSERT FAILED: disk is %s during detach\n",
1270 drbd_disk_str(mdev->state.disk));
1271
927036f9 1272 if (!drbd_send_state(mdev))
b8907339
PR
1273 dev_warn(DEV, "Notified peer that I am detaching my disk\n");
1274 else
1275 dev_err(DEV, "Sending state for detaching disk failed\n");
1276
1277 drbd_rs_cancel_all(mdev);
1278
1279 /* In case we want to get something to stable storage still,
1280 * this may be the last chance.
1281 * Following put_ldev may transition to D_DISKLESS. */
1282 drbd_md_sync(mdev);
1283 put_ldev(mdev);
1284
1285 if (was_io_error && eh == EP_CALL_HELPER)
1286 drbd_khelper(mdev, "local-io-error");
1287 }
1288
1289 /* second half of local IO error, failure to attach,
1290 * or administrative detach,
1291 * after local_cnt references have reached zero again */
1292 if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) {
1293 /* We must still be diskless,
1294 * re-attach has to be serialized with this! */
1295 if (mdev->state.disk != D_DISKLESS)
1296 dev_err(DEV,
1297 "ASSERT FAILED: disk is %s while going diskless\n",
1298 drbd_disk_str(mdev->state.disk));
1299
1300 mdev->rs_total = 0;
1301 mdev->rs_failed = 0;
1302 atomic_set(&mdev->rs_pending_cnt, 0);
1303
927036f9 1304 if (!drbd_send_state(mdev))
b8907339
PR
1305 dev_warn(DEV, "Notified peer that I'm now diskless.\n");
1306 /* corresponding get_ldev in __drbd_set_state
1307 * this may finally trigger drbd_ldev_destroy. */
1308 put_ldev(mdev);
1309 }
1310
1311 /* Notify peer that I had a local IO error, and did not detached.. */
1312 if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT)
1313 drbd_send_state(mdev);
1314
1315 /* Disks got bigger while they were detached */
1316 if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
1317 test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) {
1318 if (ns.conn == C_CONNECTED)
1319 resync_after_online_grow(mdev);
1320 }
1321
1322 /* A resync finished or aborted, wake paused devices... */
1323 if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) ||
1324 (os.peer_isp && !ns.peer_isp) ||
1325 (os.user_isp && !ns.user_isp))
1326 resume_next_sg(mdev);
1327
1328 /* sync target done with resync. Explicitly notify peer, even though
1329 * it should (at least for non-empty resyncs) already know itself. */
1330 if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
1331 drbd_send_state(mdev);
1332
1333 /* This triggers bitmap writeout of potentially still unwritten pages
1334 * if the resync finished cleanly, or aborted because of peer disk
1335 * failure, or because of connection loss.
1336 * For resync aborted because of local disk failure, we cannot do
1337 * any bitmap writeout anymore.
1338 * No harm done if some bits change during this phase.
1339 */
1340 if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) {
1341 drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL,
1342 "write from resync_finished", BM_LOCKED_SET_ALLOWED);
1343 put_ldev(mdev);
1344 }
1345
1346 if (ns.disk == D_DISKLESS &&
1347 ns.conn == C_STANDALONE &&
1348 ns.role == R_SECONDARY) {
1349 if (os.aftr_isp != ns.aftr_isp)
1350 resume_next_sg(mdev);
1351 }
1352
0e29d163 1353 after_all_state_ch(mdev->tconn);
bbeb641c 1354
b8907339
PR
1355 drbd_md_sync(mdev);
1356}
1357
bbeb641c
PR
1358struct after_conn_state_chg_work {
1359 struct drbd_work w;
1360 enum drbd_conns oc;
5f082f98 1361 union drbd_state ns_max; /* new, max state, over all mdevs */
bbeb641c
PR
1362 enum chg_state_flags flags;
1363};
1364
0e29d163 1365static void after_all_state_ch(struct drbd_tconn *tconn)
b8907339 1366{
0e29d163
PR
1367 if (conn_all_vols_unconf(tconn) &&
1368 test_bit(OBJECT_DYING, &tconn->flags)) {
bbeb641c
PR
1369 drbd_thread_stop_nowait(&tconn->worker);
1370 }
1371}
1372
99920dc5 1373static int w_after_conn_state_ch(struct drbd_work *w, int unused)
bbeb641c
PR
1374{
1375 struct after_conn_state_chg_work *acscw =
1376 container_of(w, struct after_conn_state_chg_work, w);
1377 struct drbd_tconn *tconn = w->tconn;
1378 enum drbd_conns oc = acscw->oc;
5f082f98 1379 union drbd_state ns_max = acscw->ns_max;
bbeb641c
PR
1380
1381 kfree(acscw);
1382
b8907339 1383 /* Upon network configuration, we need to start the receiver */
5f082f98 1384 if (oc == C_STANDALONE && ns_max.conn == C_UNCONNECTED)
b8907339
PR
1385 drbd_thread_start(&tconn->receiver);
1386
bbeb641c 1387 //conn_err(tconn, STATE_FMT, STATE_ARGS("nms", nms));
0e29d163 1388 after_all_state_ch(tconn);
bbeb641c 1389
99920dc5 1390 return 0;
bbeb641c
PR
1391}
1392
435693e8 1393void conn_old_common_state(struct drbd_tconn *tconn, union drbd_state *pcs, enum chg_state_flags *pf)
88ef594e 1394{
435693e8 1395 enum chg_state_flags flags = ~0;
da9fbc27 1396 union drbd_dev_state os, cs = {}; /* old_state, common_state */
88ef594e 1397 struct drbd_conf *mdev;
435693e8 1398 int vnr, first_vol = 1;
88ef594e
PR
1399
1400 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
1401 os = mdev->state;
1402
435693e8
PR
1403 if (first_vol) {
1404 cs = os;
1405 first_vol = 0;
1406 continue;
1407 }
1408
1409 if (cs.role != os.role)
1410 flags &= ~CS_DC_ROLE;
1411
1412 if (cs.peer != os.peer)
1413 flags &= ~CS_DC_PEER;
1414
1415 if (cs.conn != os.conn)
1416 flags &= ~CS_DC_CONN;
88ef594e 1417
435693e8
PR
1418 if (cs.disk != os.disk)
1419 flags &= ~CS_DC_DISK;
88ef594e 1420
435693e8
PR
1421 if (cs.pdsk != os.pdsk)
1422 flags &= ~CS_DC_PDSK;
1423 }
1424
1425 *pf |= CS_DC_MASK;
1426 *pf &= flags;
da9fbc27 1427 (*pcs).i = cs.i;
88ef594e 1428}
bbeb641c 1429
bd0c824a
PR
1430static enum drbd_state_rv
1431conn_is_valid_transition(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
88ef594e 1432 enum chg_state_flags flags)
bbeb641c 1433{
bd0c824a 1434 enum drbd_state_rv rv = SS_SUCCESS;
bbeb641c 1435 union drbd_state ns, os;
bd0c824a
PR
1436 struct drbd_conf *mdev;
1437 int vnr;
bbeb641c 1438
bd0c824a 1439 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
78bae59b 1440 os = drbd_read_state(mdev);
bd0c824a
PR
1441 ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL);
1442
778bcf2e
PR
1443 if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
1444 ns.disk = os.disk;
1445
bd0c824a
PR
1446 if (ns.i == os.i)
1447 continue;
bbeb641c 1448
bd0c824a
PR
1449 rv = is_valid_transition(os, ns);
1450 if (rv < SS_SUCCESS)
1451 break;
1452
1453 if (!(flags & CS_HARD)) {
1454 rv = is_valid_state(mdev, ns);
1455 if (rv < SS_SUCCESS) {
1456 if (is_valid_state(mdev, os) == rv)
1457 rv = is_valid_soft_transition(os, ns);
1458 } else
1459 rv = is_valid_soft_transition(os, ns);
1460 }
1461 if (rv < SS_SUCCESS)
1462 break;
bbeb641c
PR
1463 }
1464
bd0c824a
PR
1465 if (rv < SS_SUCCESS && flags & CS_VERBOSE)
1466 print_st_err(mdev, os, ns, rv);
1467
1468 return rv;
bbeb641c
PR
1469}
1470
bd0c824a
PR
1471static union drbd_state
1472conn_set_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
1473 enum chg_state_flags flags)
bbeb641c 1474{
bd0c824a
PR
1475 union drbd_state ns, os, ms = { };
1476 struct drbd_conf *mdev;
bbeb641c 1477 enum drbd_state_rv rv;
bd0c824a 1478 int vnr;
bbeb641c 1479
bd0c824a
PR
1480 if (mask.conn == C_MASK)
1481 tconn->cstate = val.conn;
1482
1483 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
78bae59b 1484 os = drbd_read_state(mdev);
bd0c824a
PR
1485 ns = apply_mask_val(os, mask, val);
1486 ns = sanitize_state(mdev, ns, NULL);
bbeb641c 1487
778bcf2e
PR
1488 if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
1489 ns.disk = os.disk;
1490
bd0c824a
PR
1491 rv = __drbd_set_state(mdev, ns, flags, NULL);
1492 if (rv < SS_SUCCESS)
1493 BUG();
bbeb641c 1494
bd0c824a
PR
1495 ms.role = max_role(mdev->state.role, ms.role);
1496 ms.peer = max_role(mdev->state.peer, ms.peer);
1497 ms.disk = max_t(enum drbd_disk_state, mdev->state.disk, ms.disk);
1498 ms.pdsk = max_t(enum drbd_disk_state, mdev->state.pdsk, ms.pdsk);
1499 }
bbeb641c 1500
bd0c824a 1501 return ms;
bbeb641c
PR
1502}
1503
df24aa45
PR
1504static enum drbd_state_rv
1505_conn_rq_cond(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val)
1506{
df24aa45
PR
1507 enum drbd_state_rv rv;
1508
1509 if (test_and_clear_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags))
1510 return SS_CW_SUCCESS;
1511
1512 if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags))
1513 return SS_CW_FAILED_BY_PEER;
1514
df24aa45
PR
1515 spin_lock_irq(&tconn->req_lock);
1516 rv = tconn->cstate != C_WF_REPORT_PARAMS ? SS_CW_NO_NEED : SS_UNKNOWN_ERROR;
1517
1518 if (rv == SS_UNKNOWN_ERROR)
435693e8 1519 rv = conn_is_valid_transition(tconn, mask, val, 0);
df24aa45 1520
bd0c824a
PR
1521 if (rv == SS_SUCCESS)
1522 rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
df24aa45
PR
1523
1524 spin_unlock_irq(&tconn->req_lock);
1525
1526 return rv;
1527}
1528
1529static enum drbd_state_rv
1530conn_cl_wide(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
1531 enum chg_state_flags f)
1532{
1533 enum drbd_state_rv rv;
1534
1535 spin_unlock_irq(&tconn->req_lock);
1536 mutex_lock(&tconn->cstate_mutex);
1537
caee1c3a 1538 if (conn_send_state_req(tconn, mask, val)) {
df24aa45
PR
1539 rv = SS_CW_FAILED_BY_PEER;
1540 /* if (f & CS_VERBOSE)
1541 print_st_err(mdev, os, ns, rv); */
1542 goto abort;
1543 }
1544
1545 wait_event(tconn->ping_wait, (rv = _conn_rq_cond(tconn, mask, val)));
1546
1547abort:
1548 mutex_unlock(&tconn->cstate_mutex);
1549 spin_lock_irq(&tconn->req_lock);
1550
1551 return rv;
1552}
1553
bbeb641c
PR
1554enum drbd_state_rv
1555_conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
1556 enum chg_state_flags flags)
1557{
1558 enum drbd_state_rv rv = SS_SUCCESS;
bbeb641c
PR
1559 struct after_conn_state_chg_work *acscw;
1560 enum drbd_conns oc = tconn->cstate;
5f082f98 1561 union drbd_state ns_max, os;
bbeb641c 1562
bbeb641c
PR
1563 rv = is_valid_conn_transition(oc, val.conn);
1564 if (rv < SS_SUCCESS)
1565 goto abort;
1566
88ef594e 1567 rv = conn_is_valid_transition(tconn, mask, val, flags);
bbeb641c
PR
1568 if (rv < SS_SUCCESS)
1569 goto abort;
1570
df24aa45
PR
1571 if (oc == C_WF_REPORT_PARAMS && val.conn == C_DISCONNECTING &&
1572 !(flags & (CS_LOCAL_ONLY | CS_HARD))) {
1573 rv = conn_cl_wide(tconn, mask, val, flags);
1574 if (rv < SS_SUCCESS)
1575 goto abort;
1576 }
1577
435693e8 1578 conn_old_common_state(tconn, &os, &flags);
5f082f98
PR
1579 ns_max = conn_set_state(tconn, mask, val, flags);
1580 ns_max.conn = val.conn;
1581 conn_pr_state_change(tconn, os, ns_max, flags);
bbeb641c
PR
1582
1583 acscw = kmalloc(sizeof(*acscw), GFP_ATOMIC);
1584 if (acscw) {
435693e8 1585 acscw->oc = os.conn;
5f082f98 1586 acscw->ns_max = ns_max;
bbeb641c
PR
1587 acscw->flags = flags;
1588 acscw->w.cb = w_after_conn_state_ch;
1589 acscw->w.tconn = tconn;
1590 drbd_queue_work(&tconn->data.work, &acscw->w);
1591 } else {
1592 conn_err(tconn, "Could not kmalloc an acscw\n");
b8907339 1593 }
bbeb641c
PR
1594
1595abort:
bbeb641c
PR
1596 return rv;
1597}
1598
1599enum drbd_state_rv
1600conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
1601 enum chg_state_flags flags)
1602{
1603 enum drbd_state_rv rv;
1604
1605 spin_lock_irq(&tconn->req_lock);
1606 rv = _conn_request_state(tconn, mask, val, flags);
1607 spin_unlock_irq(&tconn->req_lock);
1608
1609 return rv;
b8907339 1610}