4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
10 Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev
11 from Logicworks, Inc. for making SDP replication support possible.
13 drbd is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2, or (at your option)
18 drbd is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with drbd; see the file COPYING. If not, write to
25 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
28 #include <linux/drbd_limits.h>
33 extern void tl_abort_disk_io(struct drbd_conf
*mdev
);
35 struct after_state_chg_work
{
39 enum chg_state_flags flags
;
40 struct completion
*done
;
43 enum sanitize_state_warnings
{
45 ABORTED_ONLINE_VERIFY
,
47 CONNECTION_LOST_NEGOTIATING
,
48 IMPLICITLY_UPGRADED_DISK
,
49 IMPLICITLY_UPGRADED_PDSK
,
52 static int w_after_state_ch(struct drbd_work
*w
, int unused
);
53 static void after_state_ch(struct drbd_conf
*mdev
, union drbd_state os
,
54 union drbd_state ns
, enum chg_state_flags flags
);
55 static enum drbd_state_rv
is_valid_state(struct drbd_conf
*, union drbd_state
);
56 static enum drbd_state_rv
is_valid_soft_transition(union drbd_state
, union drbd_state
);
57 static enum drbd_state_rv
is_valid_transition(union drbd_state os
, union drbd_state ns
);
58 static union drbd_state
sanitize_state(struct drbd_conf
*mdev
, union drbd_state ns
,
59 enum sanitize_state_warnings
*warn
);
61 static inline bool is_susp(union drbd_state s
)
63 return s
.susp
|| s
.susp_nod
|| s
.susp_fen
;
66 bool conn_all_vols_unconf(struct drbd_tconn
*tconn
)
68 struct drbd_conf
*mdev
;
73 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
) {
74 if (mdev
->state
.disk
!= D_DISKLESS
||
75 mdev
->state
.conn
!= C_STANDALONE
||
76 mdev
->state
.role
!= R_SECONDARY
) {
86 /* Unfortunately the states where not correctly ordered, when
87 they where defined. therefore can not use max_t() here. */
88 static enum drbd_role
max_role(enum drbd_role role1
, enum drbd_role role2
)
90 if (role1
== R_PRIMARY
|| role2
== R_PRIMARY
)
92 if (role1
== R_SECONDARY
|| role2
== R_SECONDARY
)
96 static enum drbd_role
min_role(enum drbd_role role1
, enum drbd_role role2
)
98 if (role1
== R_UNKNOWN
|| role2
== R_UNKNOWN
)
100 if (role1
== R_SECONDARY
|| role2
== R_SECONDARY
)
105 enum drbd_role
conn_highest_role(struct drbd_tconn
*tconn
)
107 enum drbd_role role
= R_UNKNOWN
;
108 struct drbd_conf
*mdev
;
112 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
)
113 role
= max_role(role
, mdev
->state
.role
);
119 enum drbd_role
conn_highest_peer(struct drbd_tconn
*tconn
)
121 enum drbd_role peer
= R_UNKNOWN
;
122 struct drbd_conf
*mdev
;
126 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
)
127 peer
= max_role(peer
, mdev
->state
.peer
);
133 enum drbd_disk_state
conn_highest_disk(struct drbd_tconn
*tconn
)
135 enum drbd_disk_state ds
= D_DISKLESS
;
136 struct drbd_conf
*mdev
;
140 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
)
141 ds
= max_t(enum drbd_disk_state
, ds
, mdev
->state
.disk
);
147 enum drbd_disk_state
conn_lowest_disk(struct drbd_tconn
*tconn
)
149 enum drbd_disk_state ds
= D_MASK
;
150 struct drbd_conf
*mdev
;
154 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
)
155 ds
= min_t(enum drbd_disk_state
, ds
, mdev
->state
.disk
);
161 enum drbd_disk_state
conn_highest_pdsk(struct drbd_tconn
*tconn
)
163 enum drbd_disk_state ds
= D_DISKLESS
;
164 struct drbd_conf
*mdev
;
168 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
)
169 ds
= max_t(enum drbd_disk_state
, ds
, mdev
->state
.pdsk
);
175 enum drbd_conns
conn_lowest_conn(struct drbd_tconn
*tconn
)
177 enum drbd_conns conn
= C_MASK
;
178 struct drbd_conf
*mdev
;
182 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
)
183 conn
= min_t(enum drbd_conns
, conn
, mdev
->state
.conn
);
190 * cl_wide_st_chg() - true if the state change is a cluster wide one
191 * @mdev: DRBD device.
192 * @os: old (current) state.
193 * @ns: new (wanted) state.
195 static int cl_wide_st_chg(struct drbd_conf
*mdev
,
196 union drbd_state os
, union drbd_state ns
)
198 return (os
.conn
>= C_CONNECTED
&& ns
.conn
>= C_CONNECTED
&&
199 ((os
.role
!= R_PRIMARY
&& ns
.role
== R_PRIMARY
) ||
200 (os
.conn
!= C_STARTING_SYNC_T
&& ns
.conn
== C_STARTING_SYNC_T
) ||
201 (os
.conn
!= C_STARTING_SYNC_S
&& ns
.conn
== C_STARTING_SYNC_S
) ||
202 (os
.disk
!= D_DISKLESS
&& ns
.disk
== D_DISKLESS
))) ||
203 (os
.conn
>= C_CONNECTED
&& ns
.conn
== C_DISCONNECTING
) ||
204 (os
.conn
== C_CONNECTED
&& ns
.conn
== C_VERIFY_S
) ||
205 (os
.conn
== C_CONNECTED
&& ns
.conn
== C_WF_REPORT_PARAMS
);
208 static union drbd_state
209 apply_mask_val(union drbd_state os
, union drbd_state mask
, union drbd_state val
)
212 ns
.i
= (os
.i
& ~mask
.i
) | val
.i
;
217 drbd_change_state(struct drbd_conf
*mdev
, enum chg_state_flags f
,
218 union drbd_state mask
, union drbd_state val
)
222 enum drbd_state_rv rv
;
224 spin_lock_irqsave(&mdev
->tconn
->req_lock
, flags
);
225 ns
= apply_mask_val(drbd_read_state(mdev
), mask
, val
);
226 rv
= _drbd_set_state(mdev
, ns
, f
, NULL
);
227 spin_unlock_irqrestore(&mdev
->tconn
->req_lock
, flags
);
233 * drbd_force_state() - Impose a change which happens outside our control on our state
234 * @mdev: DRBD device.
235 * @mask: mask of state bits to change.
236 * @val: value of new state bits.
238 void drbd_force_state(struct drbd_conf
*mdev
,
239 union drbd_state mask
, union drbd_state val
)
241 drbd_change_state(mdev
, CS_HARD
, mask
, val
);
244 static enum drbd_state_rv
245 _req_st_cond(struct drbd_conf
*mdev
, union drbd_state mask
,
246 union drbd_state val
)
248 union drbd_state os
, ns
;
250 enum drbd_state_rv rv
;
252 if (test_and_clear_bit(CL_ST_CHG_SUCCESS
, &mdev
->flags
))
253 return SS_CW_SUCCESS
;
255 if (test_and_clear_bit(CL_ST_CHG_FAIL
, &mdev
->flags
))
256 return SS_CW_FAILED_BY_PEER
;
258 spin_lock_irqsave(&mdev
->tconn
->req_lock
, flags
);
259 os
= drbd_read_state(mdev
);
260 ns
= sanitize_state(mdev
, apply_mask_val(os
, mask
, val
), NULL
);
261 rv
= is_valid_transition(os
, ns
);
262 if (rv
== SS_SUCCESS
)
263 rv
= SS_UNKNOWN_ERROR
; /* cont waiting, otherwise fail. */
265 if (!cl_wide_st_chg(mdev
, os
, ns
))
267 if (rv
== SS_UNKNOWN_ERROR
) {
268 rv
= is_valid_state(mdev
, ns
);
269 if (rv
== SS_SUCCESS
) {
270 rv
= is_valid_soft_transition(os
, ns
);
271 if (rv
== SS_SUCCESS
)
272 rv
= SS_UNKNOWN_ERROR
; /* cont waiting, otherwise fail. */
275 spin_unlock_irqrestore(&mdev
->tconn
->req_lock
, flags
);
281 * drbd_req_state() - Perform an eventually cluster wide state change
282 * @mdev: DRBD device.
283 * @mask: mask of state bits to change.
284 * @val: value of new state bits.
287 * Should not be called directly, use drbd_request_state() or
288 * _drbd_request_state().
290 static enum drbd_state_rv
291 drbd_req_state(struct drbd_conf
*mdev
, union drbd_state mask
,
292 union drbd_state val
, enum chg_state_flags f
)
294 struct completion done
;
296 union drbd_state os
, ns
;
297 enum drbd_state_rv rv
;
299 init_completion(&done
);
301 if (f
& CS_SERIALIZE
)
302 mutex_lock(mdev
->state_mutex
);
304 spin_lock_irqsave(&mdev
->tconn
->req_lock
, flags
);
305 os
= drbd_read_state(mdev
);
306 ns
= sanitize_state(mdev
, apply_mask_val(os
, mask
, val
), NULL
);
307 rv
= is_valid_transition(os
, ns
);
308 if (rv
< SS_SUCCESS
) {
309 spin_unlock_irqrestore(&mdev
->tconn
->req_lock
, flags
);
313 if (cl_wide_st_chg(mdev
, os
, ns
)) {
314 rv
= is_valid_state(mdev
, ns
);
315 if (rv
== SS_SUCCESS
)
316 rv
= is_valid_soft_transition(os
, ns
);
317 spin_unlock_irqrestore(&mdev
->tconn
->req_lock
, flags
);
319 if (rv
< SS_SUCCESS
) {
321 print_st_err(mdev
, os
, ns
, rv
);
325 if (drbd_send_state_req(mdev
, mask
, val
)) {
326 rv
= SS_CW_FAILED_BY_PEER
;
328 print_st_err(mdev
, os
, ns
, rv
);
332 wait_event(mdev
->state_wait
,
333 (rv
= _req_st_cond(mdev
, mask
, val
)));
335 if (rv
< SS_SUCCESS
) {
337 print_st_err(mdev
, os
, ns
, rv
);
340 spin_lock_irqsave(&mdev
->tconn
->req_lock
, flags
);
341 ns
= apply_mask_val(drbd_read_state(mdev
), mask
, val
);
342 rv
= _drbd_set_state(mdev
, ns
, f
, &done
);
344 rv
= _drbd_set_state(mdev
, ns
, f
, &done
);
347 spin_unlock_irqrestore(&mdev
->tconn
->req_lock
, flags
);
349 if (f
& CS_WAIT_COMPLETE
&& rv
== SS_SUCCESS
) {
350 D_ASSERT(current
!= mdev
->tconn
->worker
.task
);
351 wait_for_completion(&done
);
355 if (f
& CS_SERIALIZE
)
356 mutex_unlock(mdev
->state_mutex
);
362 * _drbd_request_state() - Request a state change (with flags)
363 * @mdev: DRBD device.
364 * @mask: mask of state bits to change.
365 * @val: value of new state bits.
368 * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
369 * flag, or when logging of failed state change requests is not desired.
372 _drbd_request_state(struct drbd_conf
*mdev
, union drbd_state mask
,
373 union drbd_state val
, enum chg_state_flags f
)
375 enum drbd_state_rv rv
;
377 wait_event(mdev
->state_wait
,
378 (rv
= drbd_req_state(mdev
, mask
, val
, f
)) != SS_IN_TRANSIENT_STATE
);
383 static void print_st(struct drbd_conf
*mdev
, char *name
, union drbd_state ns
)
385 dev_err(DEV
, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n",
387 drbd_conn_str(ns
.conn
),
388 drbd_role_str(ns
.role
),
389 drbd_role_str(ns
.peer
),
390 drbd_disk_str(ns
.disk
),
391 drbd_disk_str(ns
.pdsk
),
392 is_susp(ns
) ? 's' : 'r',
393 ns
.aftr_isp
? 'a' : '-',
394 ns
.peer_isp
? 'p' : '-',
395 ns
.user_isp
? 'u' : '-',
396 ns
.susp_fen
? 'F' : '-',
397 ns
.susp_nod
? 'N' : '-'
401 void print_st_err(struct drbd_conf
*mdev
, union drbd_state os
,
402 union drbd_state ns
, enum drbd_state_rv err
)
404 if (err
== SS_IN_TRANSIENT_STATE
)
406 dev_err(DEV
, "State change failed: %s\n", drbd_set_st_err_str(err
));
407 print_st(mdev
, " state", os
);
408 print_st(mdev
, "wanted", ns
);
411 static long print_state_change(char *pb
, union drbd_state os
, union drbd_state ns
,
412 enum chg_state_flags flags
)
418 if (ns
.role
!= os
.role
&& flags
& CS_DC_ROLE
)
419 pbp
+= sprintf(pbp
, "role( %s -> %s ) ",
420 drbd_role_str(os
.role
),
421 drbd_role_str(ns
.role
));
422 if (ns
.peer
!= os
.peer
&& flags
& CS_DC_PEER
)
423 pbp
+= sprintf(pbp
, "peer( %s -> %s ) ",
424 drbd_role_str(os
.peer
),
425 drbd_role_str(ns
.peer
));
426 if (ns
.conn
!= os
.conn
&& flags
& CS_DC_CONN
)
427 pbp
+= sprintf(pbp
, "conn( %s -> %s ) ",
428 drbd_conn_str(os
.conn
),
429 drbd_conn_str(ns
.conn
));
430 if (ns
.disk
!= os
.disk
&& flags
& CS_DC_DISK
)
431 pbp
+= sprintf(pbp
, "disk( %s -> %s ) ",
432 drbd_disk_str(os
.disk
),
433 drbd_disk_str(ns
.disk
));
434 if (ns
.pdsk
!= os
.pdsk
&& flags
& CS_DC_PDSK
)
435 pbp
+= sprintf(pbp
, "pdsk( %s -> %s ) ",
436 drbd_disk_str(os
.pdsk
),
437 drbd_disk_str(ns
.pdsk
));
442 static void drbd_pr_state_change(struct drbd_conf
*mdev
, union drbd_state os
, union drbd_state ns
,
443 enum chg_state_flags flags
)
448 pbp
+= print_state_change(pbp
, os
, ns
, flags
^ CS_DC_MASK
);
450 if (ns
.aftr_isp
!= os
.aftr_isp
)
451 pbp
+= sprintf(pbp
, "aftr_isp( %d -> %d ) ",
454 if (ns
.peer_isp
!= os
.peer_isp
)
455 pbp
+= sprintf(pbp
, "peer_isp( %d -> %d ) ",
458 if (ns
.user_isp
!= os
.user_isp
)
459 pbp
+= sprintf(pbp
, "user_isp( %d -> %d ) ",
464 dev_info(DEV
, "%s\n", pb
);
467 static void conn_pr_state_change(struct drbd_tconn
*tconn
, union drbd_state os
, union drbd_state ns
,
468 enum chg_state_flags flags
)
473 pbp
+= print_state_change(pbp
, os
, ns
, flags
);
475 if (is_susp(ns
) != is_susp(os
) && flags
& CS_DC_SUSP
)
476 pbp
+= sprintf(pbp
, "susp( %d -> %d ) ",
481 conn_info(tconn
, "%s\n", pb
);
486 * is_valid_state() - Returns an SS_ error code if ns is not valid
487 * @mdev: DRBD device.
488 * @ns: State to consider.
490 static enum drbd_state_rv
491 is_valid_state(struct drbd_conf
*mdev
, union drbd_state ns
)
493 /* See drbd_state_sw_errors in drbd_strings.c */
495 enum drbd_fencing_p fp
;
496 enum drbd_state_rv rv
= SS_SUCCESS
;
501 if (get_ldev(mdev
)) {
502 fp
= rcu_dereference(mdev
->ldev
->disk_conf
)->fencing
;
506 nc
= rcu_dereference(mdev
->tconn
->net_conf
);
508 if (!nc
->two_primaries
&& ns
.role
== R_PRIMARY
) {
509 if (ns
.peer
== R_PRIMARY
)
510 rv
= SS_TWO_PRIMARIES
;
511 else if (conn_highest_peer(mdev
->tconn
) == R_PRIMARY
)
512 rv
= SS_O_VOL_PEER_PRI
;
517 /* already found a reason to abort */;
518 else if (ns
.role
== R_SECONDARY
&& mdev
->open_cnt
)
519 rv
= SS_DEVICE_IN_USE
;
521 else if (ns
.role
== R_PRIMARY
&& ns
.conn
< C_CONNECTED
&& ns
.disk
< D_UP_TO_DATE
)
522 rv
= SS_NO_UP_TO_DATE_DISK
;
524 else if (fp
>= FP_RESOURCE
&&
525 ns
.role
== R_PRIMARY
&& ns
.conn
< C_CONNECTED
&& ns
.pdsk
>= D_UNKNOWN
)
528 else if (ns
.role
== R_PRIMARY
&& ns
.disk
<= D_INCONSISTENT
&& ns
.pdsk
<= D_INCONSISTENT
)
529 rv
= SS_NO_UP_TO_DATE_DISK
;
531 else if (ns
.conn
> C_CONNECTED
&& ns
.disk
< D_INCONSISTENT
)
532 rv
= SS_NO_LOCAL_DISK
;
534 else if (ns
.conn
> C_CONNECTED
&& ns
.pdsk
< D_INCONSISTENT
)
535 rv
= SS_NO_REMOTE_DISK
;
537 else if (ns
.conn
> C_CONNECTED
&& ns
.disk
< D_UP_TO_DATE
&& ns
.pdsk
< D_UP_TO_DATE
)
538 rv
= SS_NO_UP_TO_DATE_DISK
;
540 else if ((ns
.conn
== C_CONNECTED
||
541 ns
.conn
== C_WF_BITMAP_S
||
542 ns
.conn
== C_SYNC_SOURCE
||
543 ns
.conn
== C_PAUSED_SYNC_S
) &&
544 ns
.disk
== D_OUTDATED
)
545 rv
= SS_CONNECTED_OUTDATES
;
547 else if ((ns
.conn
== C_VERIFY_S
|| ns
.conn
== C_VERIFY_T
) &&
548 (nc
->verify_alg
[0] == 0))
549 rv
= SS_NO_VERIFY_ALG
;
551 else if ((ns
.conn
== C_VERIFY_S
|| ns
.conn
== C_VERIFY_T
) &&
552 mdev
->tconn
->agreed_pro_version
< 88)
553 rv
= SS_NOT_SUPPORTED
;
555 else if (ns
.conn
>= C_CONNECTED
&& ns
.pdsk
== D_UNKNOWN
)
556 rv
= SS_CONNECTED_OUTDATES
;
564 * is_valid_soft_transition() - Returns an SS_ error code if the state transition is not possible
565 * This function limits state transitions that may be declined by DRBD. I.e.
566 * user requests (aka soft transitions).
567 * @mdev: DRBD device.
571 static enum drbd_state_rv
572 is_valid_soft_transition(union drbd_state os
, union drbd_state ns
)
574 enum drbd_state_rv rv
= SS_SUCCESS
;
576 if ((ns
.conn
== C_STARTING_SYNC_T
|| ns
.conn
== C_STARTING_SYNC_S
) &&
577 os
.conn
> C_CONNECTED
)
578 rv
= SS_RESYNC_RUNNING
;
580 if (ns
.conn
== C_DISCONNECTING
&& os
.conn
== C_STANDALONE
)
581 rv
= SS_ALREADY_STANDALONE
;
583 if (ns
.disk
> D_ATTACHING
&& os
.disk
== D_DISKLESS
)
586 if (ns
.conn
== C_WF_CONNECTION
&& os
.conn
< C_UNCONNECTED
)
587 rv
= SS_NO_NET_CONFIG
;
589 if (ns
.disk
== D_OUTDATED
&& os
.disk
< D_OUTDATED
&& os
.disk
!= D_ATTACHING
)
590 rv
= SS_LOWER_THAN_OUTDATED
;
592 if (ns
.conn
== C_DISCONNECTING
&& os
.conn
== C_UNCONNECTED
)
593 rv
= SS_IN_TRANSIENT_STATE
;
595 /* if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS)
596 rv = SS_IN_TRANSIENT_STATE; */
598 if ((ns
.conn
== C_VERIFY_S
|| ns
.conn
== C_VERIFY_T
) && os
.conn
< C_CONNECTED
)
599 rv
= SS_NEED_CONNECTION
;
601 if ((ns
.conn
== C_VERIFY_S
|| ns
.conn
== C_VERIFY_T
) &&
602 ns
.conn
!= os
.conn
&& os
.conn
> C_CONNECTED
)
603 rv
= SS_RESYNC_RUNNING
;
605 if ((ns
.conn
== C_STARTING_SYNC_S
|| ns
.conn
== C_STARTING_SYNC_T
) &&
606 os
.conn
< C_CONNECTED
)
607 rv
= SS_NEED_CONNECTION
;
609 if ((ns
.conn
== C_SYNC_TARGET
|| ns
.conn
== C_SYNC_SOURCE
)
610 && os
.conn
< C_WF_REPORT_PARAMS
)
611 rv
= SS_NEED_CONNECTION
; /* No NetworkFailure -> SyncTarget etc... */
616 static enum drbd_state_rv
617 is_valid_conn_transition(enum drbd_conns oc
, enum drbd_conns nc
)
619 /* no change -> nothing to do, at least for the connection part */
621 return SS_NOTHING_TO_DO
;
623 /* disconnect of an unconfigured connection does not make sense */
624 if (oc
== C_STANDALONE
&& nc
== C_DISCONNECTING
)
625 return SS_ALREADY_STANDALONE
;
627 /* from C_STANDALONE, we start with C_UNCONNECTED */
628 if (oc
== C_STANDALONE
&& nc
!= C_UNCONNECTED
)
629 return SS_NEED_CONNECTION
;
631 /* After a network error only C_UNCONNECTED or C_DISCONNECTING may follow. */
632 if (oc
>= C_TIMEOUT
&& oc
<= C_TEAR_DOWN
&& nc
!= C_UNCONNECTED
&& nc
!= C_DISCONNECTING
)
633 return SS_IN_TRANSIENT_STATE
;
635 /* After C_DISCONNECTING only C_STANDALONE may follow */
636 if (oc
== C_DISCONNECTING
&& nc
!= C_STANDALONE
)
637 return SS_IN_TRANSIENT_STATE
;
644 * is_valid_transition() - Returns an SS_ error code if the state transition is not possible
645 * This limits hard state transitions. Hard state transitions are facts there are
646 * imposed on DRBD by the environment. E.g. disk broke or network broke down.
647 * But those hard state transitions are still not allowed to do everything.
651 static enum drbd_state_rv
652 is_valid_transition(union drbd_state os
, union drbd_state ns
)
654 enum drbd_state_rv rv
;
656 rv
= is_valid_conn_transition(os
.conn
, ns
.conn
);
658 /* we cannot fail (again) if we already detached */
659 if (ns
.disk
== D_FAILED
&& os
.disk
== D_DISKLESS
)
665 static void print_sanitize_warnings(struct drbd_conf
*mdev
, enum sanitize_state_warnings warn
)
667 static const char *msg_table
[] = {
669 [ABORTED_ONLINE_VERIFY
] = "Online-verify aborted.",
670 [ABORTED_RESYNC
] = "Resync aborted.",
671 [CONNECTION_LOST_NEGOTIATING
] = "Connection lost while negotiating, no data!",
672 [IMPLICITLY_UPGRADED_DISK
] = "Implicitly upgraded disk",
673 [IMPLICITLY_UPGRADED_PDSK
] = "Implicitly upgraded pdsk",
676 if (warn
!= NO_WARNING
)
677 dev_warn(DEV
, "%s\n", msg_table
[warn
]);
681 * sanitize_state() - Resolves implicitly necessary additional changes to a state transition
682 * @mdev: DRBD device.
687 * When we loose connection, we have to set the state of the peers disk (pdsk)
688 * to D_UNKNOWN. This rule and many more along those lines are in this function.
690 static union drbd_state
sanitize_state(struct drbd_conf
*mdev
, union drbd_state ns
,
691 enum sanitize_state_warnings
*warn
)
693 enum drbd_fencing_p fp
;
694 enum drbd_disk_state disk_min
, disk_max
, pdsk_min
, pdsk_max
;
700 if (get_ldev(mdev
)) {
702 fp
= rcu_dereference(mdev
->ldev
->disk_conf
)->fencing
;
707 /* Implications from connection to peer and peer_isp */
708 if (ns
.conn
< C_CONNECTED
) {
711 if (ns
.pdsk
> D_UNKNOWN
|| ns
.pdsk
< D_INCONSISTENT
)
715 /* Clear the aftr_isp when becoming unconfigured */
716 if (ns
.conn
== C_STANDALONE
&& ns
.disk
== D_DISKLESS
&& ns
.role
== R_SECONDARY
)
719 /* An implication of the disk states onto the connection state */
720 /* Abort resync if a disk fails/detaches */
721 if (ns
.conn
> C_CONNECTED
&& (ns
.disk
<= D_FAILED
|| ns
.pdsk
<= D_FAILED
)) {
723 *warn
= ns
.conn
== C_VERIFY_S
|| ns
.conn
== C_VERIFY_T
?
724 ABORTED_ONLINE_VERIFY
: ABORTED_RESYNC
;
725 ns
.conn
= C_CONNECTED
;
728 /* Connection breaks down before we finished "Negotiating" */
729 if (ns
.conn
< C_CONNECTED
&& ns
.disk
== D_NEGOTIATING
&&
730 get_ldev_if_state(mdev
, D_NEGOTIATING
)) {
731 if (mdev
->ed_uuid
== mdev
->ldev
->md
.uuid
[UI_CURRENT
]) {
732 ns
.disk
= mdev
->new_state_tmp
.disk
;
733 ns
.pdsk
= mdev
->new_state_tmp
.pdsk
;
736 *warn
= CONNECTION_LOST_NEGOTIATING
;
737 ns
.disk
= D_DISKLESS
;
743 /* D_CONSISTENT and D_OUTDATED vanish when we get connected */
744 if (ns
.conn
>= C_CONNECTED
&& ns
.conn
< C_AHEAD
) {
745 if (ns
.disk
== D_CONSISTENT
|| ns
.disk
== D_OUTDATED
)
746 ns
.disk
= D_UP_TO_DATE
;
747 if (ns
.pdsk
== D_CONSISTENT
|| ns
.pdsk
== D_OUTDATED
)
748 ns
.pdsk
= D_UP_TO_DATE
;
751 /* Implications of the connection stat on the disk states */
752 disk_min
= D_DISKLESS
;
753 disk_max
= D_UP_TO_DATE
;
754 pdsk_min
= D_INCONSISTENT
;
755 pdsk_max
= D_UNKNOWN
;
756 switch ((enum drbd_conns
)ns
.conn
) {
758 case C_PAUSED_SYNC_T
:
759 case C_STARTING_SYNC_T
:
762 disk_min
= D_INCONSISTENT
;
763 disk_max
= D_OUTDATED
;
764 pdsk_min
= D_UP_TO_DATE
;
765 pdsk_max
= D_UP_TO_DATE
;
769 disk_min
= D_UP_TO_DATE
;
770 disk_max
= D_UP_TO_DATE
;
771 pdsk_min
= D_UP_TO_DATE
;
772 pdsk_max
= D_UP_TO_DATE
;
775 disk_min
= D_DISKLESS
;
776 disk_max
= D_UP_TO_DATE
;
777 pdsk_min
= D_DISKLESS
;
778 pdsk_max
= D_UP_TO_DATE
;
781 case C_PAUSED_SYNC_S
:
782 case C_STARTING_SYNC_S
:
784 disk_min
= D_UP_TO_DATE
;
785 disk_max
= D_UP_TO_DATE
;
786 pdsk_min
= D_INCONSISTENT
;
787 pdsk_max
= D_CONSISTENT
; /* D_OUTDATED would be nice. But explicit outdate necessary*/
790 disk_min
= D_INCONSISTENT
;
791 disk_max
= D_INCONSISTENT
;
792 pdsk_min
= D_UP_TO_DATE
;
793 pdsk_max
= D_UP_TO_DATE
;
796 disk_min
= D_UP_TO_DATE
;
797 disk_max
= D_UP_TO_DATE
;
798 pdsk_min
= D_INCONSISTENT
;
799 pdsk_max
= D_INCONSISTENT
;
802 case C_DISCONNECTING
:
806 case C_NETWORK_FAILURE
:
807 case C_PROTOCOL_ERROR
:
809 case C_WF_CONNECTION
:
810 case C_WF_REPORT_PARAMS
:
814 if (ns
.disk
> disk_max
)
817 if (ns
.disk
< disk_min
) {
819 *warn
= IMPLICITLY_UPGRADED_DISK
;
822 if (ns
.pdsk
> pdsk_max
)
825 if (ns
.pdsk
< pdsk_min
) {
827 *warn
= IMPLICITLY_UPGRADED_PDSK
;
831 if (fp
== FP_STONITH
&&
832 (ns
.role
== R_PRIMARY
&& ns
.conn
< C_CONNECTED
&& ns
.pdsk
> D_OUTDATED
))
833 ns
.susp_fen
= 1; /* Suspend IO while fence-peer handler runs (peer lost) */
835 if (mdev
->tconn
->res_opts
.on_no_data
== OND_SUSPEND_IO
&&
836 (ns
.role
== R_PRIMARY
&& ns
.disk
< D_UP_TO_DATE
&& ns
.pdsk
< D_UP_TO_DATE
))
837 ns
.susp_nod
= 1; /* Suspend IO while no data available (no accessible data available) */
839 if (ns
.aftr_isp
|| ns
.peer_isp
|| ns
.user_isp
) {
840 if (ns
.conn
== C_SYNC_SOURCE
)
841 ns
.conn
= C_PAUSED_SYNC_S
;
842 if (ns
.conn
== C_SYNC_TARGET
)
843 ns
.conn
= C_PAUSED_SYNC_T
;
845 if (ns
.conn
== C_PAUSED_SYNC_S
)
846 ns
.conn
= C_SYNC_SOURCE
;
847 if (ns
.conn
== C_PAUSED_SYNC_T
)
848 ns
.conn
= C_SYNC_TARGET
;
854 void drbd_resume_al(struct drbd_conf
*mdev
)
856 if (test_and_clear_bit(AL_SUSPENDED
, &mdev
->flags
))
857 dev_info(DEV
, "Resumed AL updates\n");
860 /* helper for __drbd_set_state */
861 static void set_ov_position(struct drbd_conf
*mdev
, enum drbd_conns cs
)
863 if (mdev
->tconn
->agreed_pro_version
< 90)
864 mdev
->ov_start_sector
= 0;
865 mdev
->rs_total
= drbd_bm_bits(mdev
);
866 mdev
->ov_position
= 0;
867 if (cs
== C_VERIFY_T
) {
868 /* starting online verify from an arbitrary position
869 * does not fit well into the existing protocol.
870 * on C_VERIFY_T, we initialize ov_left and friends
871 * implicitly in receive_DataRequest once the
872 * first P_OV_REQUEST is received */
873 mdev
->ov_start_sector
= ~(sector_t
)0;
875 unsigned long bit
= BM_SECT_TO_BIT(mdev
->ov_start_sector
);
876 if (bit
>= mdev
->rs_total
) {
877 mdev
->ov_start_sector
=
878 BM_BIT_TO_SECT(mdev
->rs_total
- 1);
881 mdev
->rs_total
-= bit
;
882 mdev
->ov_position
= mdev
->ov_start_sector
;
884 mdev
->ov_left
= mdev
->rs_total
;
888 * __drbd_set_state() - Set a new DRBD state
889 * @mdev: DRBD device.
892 * @done: Optional completion, that will get completed after the after_state_ch() finished
894 * Caller needs to hold req_lock, and global_state_lock. Do not call directly.
897 __drbd_set_state(struct drbd_conf
*mdev
, union drbd_state ns
,
898 enum chg_state_flags flags
, struct completion
*done
)
901 enum drbd_state_rv rv
= SS_SUCCESS
;
902 enum sanitize_state_warnings ssw
;
903 struct after_state_chg_work
*ascw
;
905 os
= drbd_read_state(mdev
);
907 ns
= sanitize_state(mdev
, ns
, &ssw
);
909 return SS_NOTHING_TO_DO
;
911 rv
= is_valid_transition(os
, ns
);
915 if (!(flags
& CS_HARD
)) {
916 /* pre-state-change checks ; only look at ns */
917 /* See drbd_state_sw_errors in drbd_strings.c */
919 rv
= is_valid_state(mdev
, ns
);
920 if (rv
< SS_SUCCESS
) {
921 /* If the old state was illegal as well, then let
924 if (is_valid_state(mdev
, os
) == rv
)
925 rv
= is_valid_soft_transition(os
, ns
);
927 rv
= is_valid_soft_transition(os
, ns
);
930 if (rv
< SS_SUCCESS
) {
931 if (flags
& CS_VERBOSE
)
932 print_st_err(mdev
, os
, ns
, rv
);
936 print_sanitize_warnings(mdev
, ssw
);
938 drbd_pr_state_change(mdev
, os
, ns
, flags
);
940 /* Display changes to the susp* flags that where caused by the call to
941 sanitize_state(). Only display it here if we where not called from
942 _conn_request_state() */
943 if (!(flags
& CS_DC_SUSP
))
944 conn_pr_state_change(mdev
->tconn
, os
, ns
, (flags
& ~CS_DC_MASK
) | CS_DC_SUSP
);
946 /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference
947 * on the ldev here, to be sure the transition -> D_DISKLESS resp.
948 * drbd_ldev_destroy() won't happen before our corresponding
949 * after_state_ch works run, where we put_ldev again. */
950 if ((os
.disk
!= D_FAILED
&& ns
.disk
== D_FAILED
) ||
951 (os
.disk
!= D_DISKLESS
&& ns
.disk
== D_DISKLESS
))
952 atomic_inc(&mdev
->local_cnt
);
954 mdev
->state
.i
= ns
.i
;
955 mdev
->tconn
->susp
= ns
.susp
;
956 mdev
->tconn
->susp_nod
= ns
.susp_nod
;
957 mdev
->tconn
->susp_fen
= ns
.susp_fen
;
959 if (os
.disk
== D_ATTACHING
&& ns
.disk
>= D_NEGOTIATING
)
960 drbd_print_uuids(mdev
, "attached to UUIDs");
962 wake_up(&mdev
->misc_wait
);
963 wake_up(&mdev
->state_wait
);
964 wake_up(&mdev
->tconn
->ping_wait
);
966 /* aborted verify run. log the last position */
967 if ((os
.conn
== C_VERIFY_S
|| os
.conn
== C_VERIFY_T
) &&
968 ns
.conn
< C_CONNECTED
) {
969 mdev
->ov_start_sector
=
970 BM_BIT_TO_SECT(drbd_bm_bits(mdev
) - mdev
->ov_left
);
971 dev_info(DEV
, "Online Verify reached sector %llu\n",
972 (unsigned long long)mdev
->ov_start_sector
);
975 if ((os
.conn
== C_PAUSED_SYNC_T
|| os
.conn
== C_PAUSED_SYNC_S
) &&
976 (ns
.conn
== C_SYNC_TARGET
|| ns
.conn
== C_SYNC_SOURCE
)) {
977 dev_info(DEV
, "Syncer continues.\n");
978 mdev
->rs_paused
+= (long)jiffies
979 -(long)mdev
->rs_mark_time
[mdev
->rs_last_mark
];
980 if (ns
.conn
== C_SYNC_TARGET
)
981 mod_timer(&mdev
->resync_timer
, jiffies
);
984 if ((os
.conn
== C_SYNC_TARGET
|| os
.conn
== C_SYNC_SOURCE
) &&
985 (ns
.conn
== C_PAUSED_SYNC_T
|| ns
.conn
== C_PAUSED_SYNC_S
)) {
986 dev_info(DEV
, "Resync suspended\n");
987 mdev
->rs_mark_time
[mdev
->rs_last_mark
] = jiffies
;
990 if (os
.conn
== C_CONNECTED
&&
991 (ns
.conn
== C_VERIFY_S
|| ns
.conn
== C_VERIFY_T
)) {
992 unsigned long now
= jiffies
;
995 set_ov_position(mdev
, ns
.conn
);
996 mdev
->rs_start
= now
;
997 mdev
->rs_last_events
= 0;
998 mdev
->rs_last_sect_ev
= 0;
999 mdev
->ov_last_oos_size
= 0;
1000 mdev
->ov_last_oos_start
= 0;
1002 for (i
= 0; i
< DRBD_SYNC_MARKS
; i
++) {
1003 mdev
->rs_mark_left
[i
] = mdev
->ov_left
;
1004 mdev
->rs_mark_time
[i
] = now
;
1007 drbd_rs_controller_reset(mdev
);
1009 if (ns
.conn
== C_VERIFY_S
) {
1010 dev_info(DEV
, "Starting Online Verify from sector %llu\n",
1011 (unsigned long long)mdev
->ov_position
);
1012 mod_timer(&mdev
->resync_timer
, jiffies
);
1016 if (get_ldev(mdev
)) {
1017 u32 mdf
= mdev
->ldev
->md
.flags
& ~(MDF_CONSISTENT
|MDF_PRIMARY_IND
|
1018 MDF_CONNECTED_IND
|MDF_WAS_UP_TO_DATE
|
1019 MDF_PEER_OUT_DATED
|MDF_CRASHED_PRIMARY
);
1021 mdf
&= ~MDF_AL_CLEAN
;
1022 if (test_bit(CRASHED_PRIMARY
, &mdev
->flags
))
1023 mdf
|= MDF_CRASHED_PRIMARY
;
1024 if (mdev
->state
.role
== R_PRIMARY
||
1025 (mdev
->state
.pdsk
< D_INCONSISTENT
&& mdev
->state
.peer
== R_PRIMARY
))
1026 mdf
|= MDF_PRIMARY_IND
;
1027 if (mdev
->state
.conn
> C_WF_REPORT_PARAMS
)
1028 mdf
|= MDF_CONNECTED_IND
;
1029 if (mdev
->state
.disk
> D_INCONSISTENT
)
1030 mdf
|= MDF_CONSISTENT
;
1031 if (mdev
->state
.disk
> D_OUTDATED
)
1032 mdf
|= MDF_WAS_UP_TO_DATE
;
1033 if (mdev
->state
.pdsk
<= D_OUTDATED
&& mdev
->state
.pdsk
>= D_INCONSISTENT
)
1034 mdf
|= MDF_PEER_OUT_DATED
;
1035 if (mdf
!= mdev
->ldev
->md
.flags
) {
1036 mdev
->ldev
->md
.flags
= mdf
;
1037 drbd_md_mark_dirty(mdev
);
1039 if (os
.disk
< D_CONSISTENT
&& ns
.disk
>= D_CONSISTENT
)
1040 drbd_set_ed_uuid(mdev
, mdev
->ldev
->md
.uuid
[UI_CURRENT
]);
1044 /* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */
1045 if (os
.disk
== D_INCONSISTENT
&& os
.pdsk
== D_INCONSISTENT
&&
1046 os
.peer
== R_SECONDARY
&& ns
.peer
== R_PRIMARY
)
1047 set_bit(CONSIDER_RESYNC
, &mdev
->flags
);
1049 /* Receiver should clean up itself */
1050 if (os
.conn
!= C_DISCONNECTING
&& ns
.conn
== C_DISCONNECTING
)
1051 drbd_thread_stop_nowait(&mdev
->tconn
->receiver
);
1053 /* Now the receiver finished cleaning up itself, it should die */
1054 if (os
.conn
!= C_STANDALONE
&& ns
.conn
== C_STANDALONE
)
1055 drbd_thread_stop_nowait(&mdev
->tconn
->receiver
);
1057 /* Upon network failure, we need to restart the receiver. */
1058 if (os
.conn
> C_WF_CONNECTION
&&
1059 ns
.conn
<= C_TEAR_DOWN
&& ns
.conn
>= C_TIMEOUT
)
1060 drbd_thread_restart_nowait(&mdev
->tconn
->receiver
);
1062 /* Resume AL writing if we get a connection */
1063 if (os
.conn
< C_CONNECTED
&& ns
.conn
>= C_CONNECTED
)
1064 drbd_resume_al(mdev
);
1066 ascw
= kmalloc(sizeof(*ascw
), GFP_ATOMIC
);
1070 ascw
->flags
= flags
;
1071 ascw
->w
.cb
= w_after_state_ch
;
1072 ascw
->w
.mdev
= mdev
;
1074 drbd_queue_work(&mdev
->tconn
->data
.work
, &ascw
->w
);
1076 dev_err(DEV
, "Could not kmalloc an ascw\n");
1082 static int w_after_state_ch(struct drbd_work
*w
, int unused
)
1084 struct after_state_chg_work
*ascw
=
1085 container_of(w
, struct after_state_chg_work
, w
);
1086 struct drbd_conf
*mdev
= w
->mdev
;
1088 after_state_ch(mdev
, ascw
->os
, ascw
->ns
, ascw
->flags
);
1089 if (ascw
->flags
& CS_WAIT_COMPLETE
) {
1090 D_ASSERT(ascw
->done
!= NULL
);
1091 complete(ascw
->done
);
1098 static void abw_start_sync(struct drbd_conf
*mdev
, int rv
)
1101 dev_err(DEV
, "Writing the bitmap failed not starting resync.\n");
1102 _drbd_request_state(mdev
, NS(conn
, C_CONNECTED
), CS_VERBOSE
);
1106 switch (mdev
->state
.conn
) {
1107 case C_STARTING_SYNC_T
:
1108 _drbd_request_state(mdev
, NS(conn
, C_WF_SYNC_UUID
), CS_VERBOSE
);
1110 case C_STARTING_SYNC_S
:
1111 drbd_start_resync(mdev
, C_SYNC_SOURCE
);
1116 int drbd_bitmap_io_from_worker(struct drbd_conf
*mdev
,
1117 int (*io_fn
)(struct drbd_conf
*),
1118 char *why
, enum bm_flag flags
)
1122 D_ASSERT(current
== mdev
->tconn
->worker
.task
);
1124 /* open coded non-blocking drbd_suspend_io(mdev); */
1125 set_bit(SUSPEND_IO
, &mdev
->flags
);
1127 drbd_bm_lock(mdev
, why
, flags
);
1129 drbd_bm_unlock(mdev
);
1131 drbd_resume_io(mdev
);
1137 * after_state_ch() - Perform after state change actions that may sleep
1138 * @mdev: DRBD device.
1143 static void after_state_ch(struct drbd_conf
*mdev
, union drbd_state os
,
1144 union drbd_state ns
, enum chg_state_flags flags
)
1146 struct sib_info sib
;
1148 sib
.sib_reason
= SIB_STATE_CHANGE
;
1152 if (os
.conn
!= C_CONNECTED
&& ns
.conn
== C_CONNECTED
) {
1153 clear_bit(CRASHED_PRIMARY
, &mdev
->flags
);
1155 mdev
->p_uuid
[UI_FLAGS
] &= ~((u64
)2);
1158 /* Inform userspace about the change... */
1159 drbd_bcast_event(mdev
, &sib
);
1161 if (!(os
.role
== R_PRIMARY
&& os
.disk
< D_UP_TO_DATE
&& os
.pdsk
< D_UP_TO_DATE
) &&
1162 (ns
.role
== R_PRIMARY
&& ns
.disk
< D_UP_TO_DATE
&& ns
.pdsk
< D_UP_TO_DATE
))
1163 drbd_khelper(mdev
, "pri-on-incon-degr");
1165 /* Here we have the actions that are performed after a
1166 state change. This function might sleep */
1169 enum drbd_req_event what
= NOTHING
;
1171 if (os
.conn
< C_CONNECTED
&& conn_lowest_conn(mdev
->tconn
) >= C_CONNECTED
)
1174 if ((os
.disk
== D_ATTACHING
|| os
.disk
== D_NEGOTIATING
) &&
1175 conn_lowest_disk(mdev
->tconn
) > D_NEGOTIATING
)
1176 what
= RESTART_FROZEN_DISK_IO
;
1178 if (what
!= NOTHING
) {
1179 spin_lock_irq(&mdev
->tconn
->req_lock
);
1180 _tl_restart(mdev
->tconn
, what
);
1181 _drbd_set_state(_NS(mdev
, susp_nod
, 0), CS_VERBOSE
, NULL
);
1182 spin_unlock_irq(&mdev
->tconn
->req_lock
);
1186 /* Became sync source. With protocol >= 96, we still need to send out
1187 * the sync uuid now. Need to do that before any drbd_send_state, or
1188 * the other side may go "paused sync" before receiving the sync uuids,
1189 * which is unexpected. */
1190 if ((os
.conn
!= C_SYNC_SOURCE
&& os
.conn
!= C_PAUSED_SYNC_S
) &&
1191 (ns
.conn
== C_SYNC_SOURCE
|| ns
.conn
== C_PAUSED_SYNC_S
) &&
1192 mdev
->tconn
->agreed_pro_version
>= 96 && get_ldev(mdev
)) {
1193 drbd_gen_and_send_sync_uuid(mdev
);
1197 /* Do not change the order of the if above and the two below... */
1198 if (os
.pdsk
== D_DISKLESS
&&
1199 ns
.pdsk
> D_DISKLESS
&& ns
.pdsk
!= D_UNKNOWN
) { /* attach on the peer */
1200 drbd_send_uuids(mdev
);
1201 drbd_send_state(mdev
, ns
);
1203 /* No point in queuing send_bitmap if we don't have a connection
1204 * anymore, so check also the _current_ state, not only the new state
1205 * at the time this work was queued. */
1206 if (os
.conn
!= C_WF_BITMAP_S
&& ns
.conn
== C_WF_BITMAP_S
&&
1207 mdev
->state
.conn
== C_WF_BITMAP_S
)
1208 drbd_queue_bitmap_io(mdev
, &drbd_send_bitmap
, NULL
,
1209 "send_bitmap (WFBitMapS)",
1210 BM_LOCKED_TEST_ALLOWED
);
1212 /* Lost contact to peer's copy of the data */
1213 if ((os
.pdsk
>= D_INCONSISTENT
&&
1214 os
.pdsk
!= D_UNKNOWN
&&
1215 os
.pdsk
!= D_OUTDATED
)
1216 && (ns
.pdsk
< D_INCONSISTENT
||
1217 ns
.pdsk
== D_UNKNOWN
||
1218 ns
.pdsk
== D_OUTDATED
)) {
1219 if (get_ldev(mdev
)) {
1220 if ((ns
.role
== R_PRIMARY
|| ns
.peer
== R_PRIMARY
) &&
1221 mdev
->ldev
->md
.uuid
[UI_BITMAP
] == 0 && ns
.disk
>= D_UP_TO_DATE
) {
1222 if (drbd_suspended(mdev
)) {
1223 set_bit(NEW_CUR_UUID
, &mdev
->flags
);
1225 drbd_uuid_new_current(mdev
);
1226 drbd_send_uuids(mdev
);
1233 if (ns
.pdsk
< D_INCONSISTENT
&& get_ldev(mdev
)) {
1234 if (os
.peer
== R_SECONDARY
&& ns
.peer
== R_PRIMARY
&&
1235 mdev
->ldev
->md
.uuid
[UI_BITMAP
] == 0 && ns
.disk
>= D_UP_TO_DATE
) {
1236 drbd_uuid_new_current(mdev
);
1237 drbd_send_uuids(mdev
);
1239 /* D_DISKLESS Peer becomes secondary */
1240 if (os
.peer
== R_PRIMARY
&& ns
.peer
== R_SECONDARY
)
1241 /* We may still be Primary ourselves.
1242 * No harm done if the bitmap still changes,
1243 * redirtied pages will follow later. */
1244 drbd_bitmap_io_from_worker(mdev
, &drbd_bm_write
,
1245 "demote diskless peer", BM_LOCKED_SET_ALLOWED
);
1249 /* Write out all changed bits on demote.
1250 * Though, no need to da that just yet
1251 * if there is a resync going on still */
1252 if (os
.role
== R_PRIMARY
&& ns
.role
== R_SECONDARY
&&
1253 mdev
->state
.conn
<= C_CONNECTED
&& get_ldev(mdev
)) {
1254 /* No changes to the bitmap expected this time, so assert that,
1255 * even though no harm was done if it did change. */
1256 drbd_bitmap_io_from_worker(mdev
, &drbd_bm_write
,
1257 "demote", BM_LOCKED_TEST_ALLOWED
);
1261 /* Last part of the attaching process ... */
1262 if (ns
.conn
>= C_CONNECTED
&&
1263 os
.disk
== D_ATTACHING
&& ns
.disk
== D_NEGOTIATING
) {
1264 drbd_send_sizes(mdev
, 0, 0); /* to start sync... */
1265 drbd_send_uuids(mdev
);
1266 drbd_send_state(mdev
, ns
);
1269 /* We want to pause/continue resync, tell peer. */
1270 if (ns
.conn
>= C_CONNECTED
&&
1271 ((os
.aftr_isp
!= ns
.aftr_isp
) ||
1272 (os
.user_isp
!= ns
.user_isp
)))
1273 drbd_send_state(mdev
, ns
);
1275 /* In case one of the isp bits got set, suspend other devices. */
1276 if ((!os
.aftr_isp
&& !os
.peer_isp
&& !os
.user_isp
) &&
1277 (ns
.aftr_isp
|| ns
.peer_isp
|| ns
.user_isp
))
1278 suspend_other_sg(mdev
);
1280 /* Make sure the peer gets informed about eventual state
1281 changes (ISP bits) while we were in WFReportParams. */
1282 if (os
.conn
== C_WF_REPORT_PARAMS
&& ns
.conn
>= C_CONNECTED
)
1283 drbd_send_state(mdev
, ns
);
1285 if (os
.conn
!= C_AHEAD
&& ns
.conn
== C_AHEAD
)
1286 drbd_send_state(mdev
, ns
);
1288 /* We are in the progress to start a full sync... */
1289 if ((os
.conn
!= C_STARTING_SYNC_T
&& ns
.conn
== C_STARTING_SYNC_T
) ||
1290 (os
.conn
!= C_STARTING_SYNC_S
&& ns
.conn
== C_STARTING_SYNC_S
))
1291 /* no other bitmap changes expected during this phase */
1292 drbd_queue_bitmap_io(mdev
,
1293 &drbd_bmio_set_n_write
, &abw_start_sync
,
1294 "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED
);
1296 /* We are invalidating our self... */
1297 if (os
.conn
< C_CONNECTED
&& ns
.conn
< C_CONNECTED
&&
1298 os
.disk
> D_INCONSISTENT
&& ns
.disk
== D_INCONSISTENT
)
1299 /* other bitmap operation expected during this phase */
1300 drbd_queue_bitmap_io(mdev
, &drbd_bmio_set_n_write
, NULL
,
1301 "set_n_write from invalidate", BM_LOCKED_MASK
);
1303 /* first half of local IO error, failure to attach,
1304 * or administrative detach */
1305 if (os
.disk
!= D_FAILED
&& ns
.disk
== D_FAILED
) {
1306 enum drbd_io_error_p eh
;
1308 /* corresponding get_ldev was in __drbd_set_state, to serialize
1309 * our cleanup here with the transition to D_DISKLESS,
1310 * so it is safe to dreference ldev here. */
1312 eh
= rcu_dereference(mdev
->ldev
->disk_conf
)->on_io_error
;
1314 was_io_error
= test_and_clear_bit(WAS_IO_ERROR
, &mdev
->flags
);
1316 /* Immediately allow completion of all application IO, that waits
1317 for completion from the local disk. */
1318 tl_abort_disk_io(mdev
);
1320 /* current state still has to be D_FAILED,
1321 * there is only one way out: to D_DISKLESS,
1322 * and that may only happen after our put_ldev below. */
1323 if (mdev
->state
.disk
!= D_FAILED
)
1325 "ASSERT FAILED: disk is %s during detach\n",
1326 drbd_disk_str(mdev
->state
.disk
));
1328 drbd_send_state(mdev
, ns
);
1329 drbd_rs_cancel_all(mdev
);
1331 /* In case we want to get something to stable storage still,
1332 * this may be the last chance.
1333 * Following put_ldev may transition to D_DISKLESS. */
1337 if (was_io_error
&& eh
== EP_CALL_HELPER
)
1338 drbd_khelper(mdev
, "local-io-error");
1341 /* second half of local IO error, failure to attach,
1342 * or administrative detach,
1343 * after local_cnt references have reached zero again */
1344 if (os
.disk
!= D_DISKLESS
&& ns
.disk
== D_DISKLESS
) {
1345 /* We must still be diskless,
1346 * re-attach has to be serialized with this! */
1347 if (mdev
->state
.disk
!= D_DISKLESS
)
1349 "ASSERT FAILED: disk is %s while going diskless\n",
1350 drbd_disk_str(mdev
->state
.disk
));
1353 mdev
->rs_failed
= 0;
1354 atomic_set(&mdev
->rs_pending_cnt
, 0);
1356 drbd_send_state(mdev
, ns
);
1357 /* corresponding get_ldev in __drbd_set_state
1358 * this may finally trigger drbd_ldev_destroy. */
1362 /* Notify peer that I had a local IO error, and did not detached.. */
1363 if (os
.disk
== D_UP_TO_DATE
&& ns
.disk
== D_INCONSISTENT
)
1364 drbd_send_state(mdev
, ns
);
1366 /* Disks got bigger while they were detached */
1367 if (ns
.disk
> D_NEGOTIATING
&& ns
.pdsk
> D_NEGOTIATING
&&
1368 test_and_clear_bit(RESYNC_AFTER_NEG
, &mdev
->flags
)) {
1369 if (ns
.conn
== C_CONNECTED
)
1370 resync_after_online_grow(mdev
);
1373 /* A resync finished or aborted, wake paused devices... */
1374 if ((os
.conn
> C_CONNECTED
&& ns
.conn
<= C_CONNECTED
) ||
1375 (os
.peer_isp
&& !ns
.peer_isp
) ||
1376 (os
.user_isp
&& !ns
.user_isp
))
1377 resume_next_sg(mdev
);
1379 /* sync target done with resync. Explicitly notify peer, even though
1380 * it should (at least for non-empty resyncs) already know itself. */
1381 if (os
.disk
< D_UP_TO_DATE
&& os
.conn
>= C_SYNC_SOURCE
&& ns
.conn
== C_CONNECTED
)
1382 drbd_send_state(mdev
, ns
);
1384 /* This triggers bitmap writeout of potentially still unwritten pages
1385 * if the resync finished cleanly, or aborted because of peer disk
1386 * failure, or because of connection loss.
1387 * For resync aborted because of local disk failure, we cannot do
1388 * any bitmap writeout anymore.
1389 * No harm done if some bits change during this phase.
1391 if (os
.conn
> C_CONNECTED
&& ns
.conn
<= C_CONNECTED
&& get_ldev(mdev
)) {
1392 drbd_queue_bitmap_io(mdev
, &drbd_bm_write
, NULL
,
1393 "write from resync_finished", BM_LOCKED_SET_ALLOWED
);
1397 if (ns
.disk
== D_DISKLESS
&&
1398 ns
.conn
== C_STANDALONE
&&
1399 ns
.role
== R_SECONDARY
) {
1400 if (os
.aftr_isp
!= ns
.aftr_isp
)
1401 resume_next_sg(mdev
);
1407 struct after_conn_state_chg_work
{
1410 union drbd_state ns_min
;
1411 union drbd_state ns_max
; /* new, max state, over all mdevs */
1412 enum chg_state_flags flags
;
1415 static int w_after_conn_state_ch(struct drbd_work
*w
, int unused
)
1417 struct after_conn_state_chg_work
*acscw
=
1418 container_of(w
, struct after_conn_state_chg_work
, w
);
1419 struct drbd_tconn
*tconn
= w
->tconn
;
1420 enum drbd_conns oc
= acscw
->oc
;
1421 union drbd_state ns_max
= acscw
->ns_max
;
1422 union drbd_state ns_min
= acscw
->ns_min
;
1423 struct drbd_conf
*mdev
;
1428 /* Upon network configuration, we need to start the receiver */
1429 if (oc
== C_STANDALONE
&& ns_max
.conn
== C_UNCONNECTED
)
1430 drbd_thread_start(&tconn
->receiver
);
1432 if (oc
== C_DISCONNECTING
&& ns_max
.conn
== C_STANDALONE
) {
1433 struct net_conf
*old_conf
;
1435 mutex_lock(&tconn
->conf_update
);
1436 old_conf
= tconn
->net_conf
;
1437 tconn
->my_addr_len
= 0;
1438 tconn
->peer_addr_len
= 0;
1439 rcu_assign_pointer(tconn
->net_conf
, NULL
);
1440 conn_free_crypto(tconn
);
1441 mutex_unlock(&tconn
->conf_update
);
1447 if (ns_max
.susp_fen
) {
1448 /* case1: The outdate peer handler is successful: */
1449 if (ns_max
.pdsk
<= D_OUTDATED
) {
1452 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
) {
1453 if (test_bit(NEW_CUR_UUID
, &mdev
->flags
)) {
1454 drbd_uuid_new_current(mdev
);
1455 clear_bit(NEW_CUR_UUID
, &mdev
->flags
);
1459 conn_request_state(tconn
,
1460 (union drbd_state
) { { .susp_fen
= 1 } },
1461 (union drbd_state
) { { .susp_fen
= 0 } },
1464 /* case2: The connection was established again: */
1465 if (ns_min
.conn
>= C_CONNECTED
) {
1467 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
)
1468 clear_bit(NEW_CUR_UUID
, &mdev
->flags
);
1470 spin_lock_irq(&tconn
->req_lock
);
1471 _tl_restart(tconn
, RESEND
);
1472 _conn_request_state(tconn
,
1473 (union drbd_state
) { { .susp_fen
= 1 } },
1474 (union drbd_state
) { { .susp_fen
= 0 } },
1476 spin_unlock_irq(&tconn
->req_lock
);
1479 kref_put(&tconn
->kref
, &conn_destroy
);
1483 void conn_old_common_state(struct drbd_tconn
*tconn
, union drbd_state
*pcs
, enum chg_state_flags
*pf
)
1485 enum chg_state_flags flags
= ~0;
1486 struct drbd_conf
*mdev
;
1487 int vnr
, first_vol
= 1;
1488 union drbd_dev_state os
, cs
= {
1489 { .role
= R_SECONDARY
,
1491 .conn
= tconn
->cstate
,
1497 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
) {
1506 if (cs
.role
!= os
.role
)
1507 flags
&= ~CS_DC_ROLE
;
1509 if (cs
.peer
!= os
.peer
)
1510 flags
&= ~CS_DC_PEER
;
1512 if (cs
.conn
!= os
.conn
)
1513 flags
&= ~CS_DC_CONN
;
1515 if (cs
.disk
!= os
.disk
)
1516 flags
&= ~CS_DC_DISK
;
1518 if (cs
.pdsk
!= os
.pdsk
)
1519 flags
&= ~CS_DC_PDSK
;
1528 static enum drbd_state_rv
1529 conn_is_valid_transition(struct drbd_tconn
*tconn
, union drbd_state mask
, union drbd_state val
,
1530 enum chg_state_flags flags
)
1532 enum drbd_state_rv rv
= SS_SUCCESS
;
1533 union drbd_state ns
, os
;
1534 struct drbd_conf
*mdev
;
1538 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
) {
1539 os
= drbd_read_state(mdev
);
1540 ns
= sanitize_state(mdev
, apply_mask_val(os
, mask
, val
), NULL
);
1542 if (flags
& CS_IGN_OUTD_FAIL
&& ns
.disk
== D_OUTDATED
&& os
.disk
< D_OUTDATED
)
1548 rv
= is_valid_transition(os
, ns
);
1549 if (rv
< SS_SUCCESS
)
1552 if (!(flags
& CS_HARD
)) {
1553 rv
= is_valid_state(mdev
, ns
);
1554 if (rv
< SS_SUCCESS
) {
1555 if (is_valid_state(mdev
, os
) == rv
)
1556 rv
= is_valid_soft_transition(os
, ns
);
1558 rv
= is_valid_soft_transition(os
, ns
);
1560 if (rv
< SS_SUCCESS
)
1565 if (rv
< SS_SUCCESS
&& flags
& CS_VERBOSE
)
1566 print_st_err(mdev
, os
, ns
, rv
);
1572 conn_set_state(struct drbd_tconn
*tconn
, union drbd_state mask
, union drbd_state val
,
1573 union drbd_state
*pns_min
, union drbd_state
*pns_max
, enum chg_state_flags flags
)
1575 union drbd_state ns
, os
, ns_max
= { };
1576 union drbd_state ns_min
= {
1583 struct drbd_conf
*mdev
;
1584 enum drbd_state_rv rv
;
1585 int vnr
, number_of_volumes
= 0;
1587 if (mask
.conn
== C_MASK
)
1588 tconn
->cstate
= val
.conn
;
1591 idr_for_each_entry(&tconn
->volumes
, mdev
, vnr
) {
1592 number_of_volumes
++;
1593 os
= drbd_read_state(mdev
);
1594 ns
= apply_mask_val(os
, mask
, val
);
1595 ns
= sanitize_state(mdev
, ns
, NULL
);
1597 if (flags
& CS_IGN_OUTD_FAIL
&& ns
.disk
== D_OUTDATED
&& os
.disk
< D_OUTDATED
)
1600 rv
= __drbd_set_state(mdev
, ns
, flags
, NULL
);
1601 if (rv
< SS_SUCCESS
)
1604 ns
.i
= mdev
->state
.i
;
1605 ns_max
.role
= max_role(ns
.role
, ns_max
.role
);
1606 ns_max
.peer
= max_role(ns
.peer
, ns_max
.peer
);
1607 ns_max
.conn
= max_t(enum drbd_conns
, ns
.conn
, ns_max
.conn
);
1608 ns_max
.disk
= max_t(enum drbd_disk_state
, ns
.disk
, ns_max
.disk
);
1609 ns_max
.pdsk
= max_t(enum drbd_disk_state
, ns
.pdsk
, ns_max
.pdsk
);
1611 ns_min
.role
= min_role(ns
.role
, ns_min
.role
);
1612 ns_min
.peer
= min_role(ns
.peer
, ns_min
.peer
);
1613 ns_min
.conn
= min_t(enum drbd_conns
, ns
.conn
, ns_min
.conn
);
1614 ns_min
.disk
= min_t(enum drbd_disk_state
, ns
.disk
, ns_min
.disk
);
1615 ns_min
.pdsk
= min_t(enum drbd_disk_state
, ns
.pdsk
, ns_min
.pdsk
);
1619 if (number_of_volumes
== 0) {
1620 ns_min
= ns_max
= (union drbd_state
) { {
1621 .role
= R_SECONDARY
,
1629 ns_min
.susp
= ns_max
.susp
= tconn
->susp
;
1630 ns_min
.susp_nod
= ns_max
.susp_nod
= tconn
->susp_nod
;
1631 ns_min
.susp_fen
= ns_max
.susp_fen
= tconn
->susp_fen
;
1637 static enum drbd_state_rv
1638 _conn_rq_cond(struct drbd_tconn
*tconn
, union drbd_state mask
, union drbd_state val
)
1640 enum drbd_state_rv rv
;
1642 if (test_and_clear_bit(CONN_WD_ST_CHG_OKAY
, &tconn
->flags
))
1643 return SS_CW_SUCCESS
;
1645 if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL
, &tconn
->flags
))
1646 return SS_CW_FAILED_BY_PEER
;
1648 spin_lock_irq(&tconn
->req_lock
);
1649 rv
= tconn
->cstate
!= C_WF_REPORT_PARAMS
? SS_CW_NO_NEED
: SS_UNKNOWN_ERROR
;
1651 if (rv
== SS_UNKNOWN_ERROR
)
1652 rv
= conn_is_valid_transition(tconn
, mask
, val
, 0);
1654 if (rv
== SS_SUCCESS
)
1655 rv
= SS_UNKNOWN_ERROR
; /* cont waiting, otherwise fail. */
1657 spin_unlock_irq(&tconn
->req_lock
);
1662 static enum drbd_state_rv
1663 conn_cl_wide(struct drbd_tconn
*tconn
, union drbd_state mask
, union drbd_state val
,
1664 enum chg_state_flags f
)
1666 enum drbd_state_rv rv
;
1668 spin_unlock_irq(&tconn
->req_lock
);
1669 mutex_lock(&tconn
->cstate_mutex
);
1671 if (conn_send_state_req(tconn
, mask
, val
)) {
1672 rv
= SS_CW_FAILED_BY_PEER
;
1673 /* if (f & CS_VERBOSE)
1674 print_st_err(mdev, os, ns, rv); */
1678 wait_event(tconn
->ping_wait
, (rv
= _conn_rq_cond(tconn
, mask
, val
)));
1681 mutex_unlock(&tconn
->cstate_mutex
);
1682 spin_lock_irq(&tconn
->req_lock
);
1688 _conn_request_state(struct drbd_tconn
*tconn
, union drbd_state mask
, union drbd_state val
,
1689 enum chg_state_flags flags
)
1691 enum drbd_state_rv rv
= SS_SUCCESS
;
1692 struct after_conn_state_chg_work
*acscw
;
1693 enum drbd_conns oc
= tconn
->cstate
;
1694 union drbd_state ns_max
, ns_min
, os
;
1696 rv
= is_valid_conn_transition(oc
, val
.conn
);
1697 if (rv
< SS_SUCCESS
)
1700 rv
= conn_is_valid_transition(tconn
, mask
, val
, flags
);
1701 if (rv
< SS_SUCCESS
)
1704 if (oc
== C_WF_REPORT_PARAMS
&& val
.conn
== C_DISCONNECTING
&&
1705 !(flags
& (CS_LOCAL_ONLY
| CS_HARD
))) {
1706 rv
= conn_cl_wide(tconn
, mask
, val
, flags
);
1707 if (rv
< SS_SUCCESS
)
1711 conn_old_common_state(tconn
, &os
, &flags
);
1712 flags
|= CS_DC_SUSP
;
1713 conn_set_state(tconn
, mask
, val
, &ns_min
, &ns_max
, flags
);
1714 conn_pr_state_change(tconn
, os
, ns_max
, flags
);
1716 acscw
= kmalloc(sizeof(*acscw
), GFP_ATOMIC
);
1718 acscw
->oc
= os
.conn
;
1719 acscw
->ns_min
= ns_min
;
1720 acscw
->ns_max
= ns_max
;
1721 acscw
->flags
= flags
;
1722 acscw
->w
.cb
= w_after_conn_state_ch
;
1723 kref_get(&tconn
->kref
);
1724 acscw
->w
.tconn
= tconn
;
1725 drbd_queue_work(&tconn
->data
.work
, &acscw
->w
);
1727 conn_err(tconn
, "Could not kmalloc an acscw\n");
1735 conn_request_state(struct drbd_tconn
*tconn
, union drbd_state mask
, union drbd_state val
,
1736 enum chg_state_flags flags
)
1738 enum drbd_state_rv rv
;
1740 spin_lock_irq(&tconn
->req_lock
);
1741 rv
= _conn_request_state(tconn
, mask
, val
, flags
);
1742 spin_unlock_irq(&tconn
->req_lock
);