]> git.proxmox.com Git - mirror_ovs.git/blob - ovn/utilities/ovndb-servers.ocf
ovn-architecture: Add notes on L3 gateway HA.
[mirror_ovs.git] / ovn / utilities / ovndb-servers.ocf
1 #!/bin/bash
2
3 : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
4 . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
5 : ${OVN_CTL_DEFAULT="/usr/share/openvswitch/scripts/ovn-ctl"}
6 : ${NB_MASTER_PORT_DEFAULT="6641"}
7 : ${NB_MASTER_PROTO_DEFAULT="tcp"}
8 : ${SB_MASTER_PORT_DEFAULT="6642"}
9 : ${SB_MASTER_PROTO_DEFAULT="tcp"}
10 : ${MANAGE_NORTHD_DEFAULT="no"}
11 CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot"
12 CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name OVN_REPL_INFO -s ovn_ovsdb_master_server"
13 OVN_CTL=${OCF_RESKEY_ovn_ctl:-${OVN_CTL_DEFAULT}}
14 MASTER_IP=${OCF_RESKEY_master_ip}
15 NB_MASTER_PORT=${OCF_RESKEY_nb_master_port:-${NB_MASTER_PORT_DEFAULT}}
16 NB_MASTER_PROTO=${OCF_RESKEY_nb_master_protocol:-${NB_MASTER_PROTO_DEFAULT}}
17 SB_MASTER_PORT=${OCF_RESKEY_sb_master_port:-${SB_MASTER_PORT_DEFAULT}}
18 SB_MASTER_PROTO=${OCF_RESKEY_sb_master_protocol:-${SB_MASTER_PROTO_DEFAULT}}
19 MANAGE_NORTHD=${OCF_RESKEY_manage_northd:-${MANAGE_NORTHD_DEFAULT}}
20
21 # Invalid IP address is an address that can never exist in the network, as
22 # mentioned in rfc-5737. The ovsdb servers connects to this IP address till
23 # a master is promoted and the IPAddr2 resource is started.
24 INVALID_IP_ADDRESS=192.0.2.254
25
26 host_name=$(ocf_local_nodename)
27 : ${slave_score=5}
28 : ${master_score=10}
29
30 ovsdb_server_metadata() {
31 cat <<END
32 <?xml version="1.0"?>
33 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
34 <resource-agent name="ovsdb-server">
35 <version>1.0</version>
36
37 <longdesc lang="en">
38 This resource manages ovsdb-server.
39 </longdesc>
40
41 <shortdesc lang="en">
42 Manages ovsdb-server.
43 </shortdesc>
44
45 <parameters>
46
47 <parameter name="ovn_ctl" unique="1">
48 <longdesc lang="en">
49 Location to the ovn-ctl script file
50 </longdesc>
51 <shortdesc lang="en">ovn-ctl script</shortdesc>
52 <content type="string" default="${OVN_CTL_DEFAULT}" />
53 </parameter>
54
55 <parameter name="master_ip" unique="1">
56 <longdesc lang="en">
57 The IP address resource which will be available on the master ovsdb server
58 </longdesc>
59 <shortdesc lang="en">master ip address</shortdesc>
60 <content type="string" />
61 </parameter>
62
63 <parameter name="nb_master_port" unique="1">
64 <longdesc lang="en">
65 The port which the master Northbound database server is listening
66 </longdesc>
67 <shortdesc lang="en">master Northbound database port</shortdesc>
68 <content type="string" />
69 </parameter>
70
71 <parameter name="nb_master_protocol" unique="1">
72 <longdesc lang="en">
73 The protocol which the master Northbound database server used, 'tcp' or 'ssl'.
74 </longdesc>
75 <shortdesc lang="en">master Northbound database protocol</shortdesc>
76 <content type="string" />
77 </parameter>
78
79 <parameter name="sb_master_port" unique="1">
80 <longdesc lang="en">
81 The port which the master Southbound database server is listening
82 </longdesc>
83 <shortdesc lang="en">master Southbound database port</shortdesc>
84 <content type="string" />
85 </parameter>
86
87 <parameter name="sb_master_protocol" unique="1">
88 <longdesc lang="en">
89 The protocol which the master Southbound database server used, 'tcp' or 'ssl'.
90 </longdesc>
91 <shortdesc lang="en">master Southbound database protocol</shortdesc>
92 <content type="string" />
93 </parameter>
94
95 <parameter name="manage_northd" unique="1">
96 <longdesc lang="en">
97 If set to yes, manages ovn-northd service. ovn-northd will be started in
98 the master node.
99 </longdesc>
100 <shortdesc lang="en">manage ovn-northd service</shortdesc>
101 <content type="string" />
102 </parameter>
103
104 </parameters>
105
106 <actions>
107 <action name="notify" timeout="20s" />
108 <action name="start" timeout="30s" />
109 <action name="stop" timeout="20s" />
110 <action name="promote" timeout="50s" />
111 <action name="demote" timeout="50s" />
112 <action name="monitor" timeout="20s" depth="0" interval="10s" />
113 <action name="meta-data" timeout="5s" />
114 <action name="validate-all" timeout="20s" />
115 </actions>
116 </resource-agent>
117 END
118 exit $OCF_SUCCESS
119 }
120
121 ovsdb_server_notify() {
122 # requires the notify=true meta resource attribute
123 local type_op="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}"
124
125 if [ "$type_op" != "post-promote" ]; then
126 # We are only interested in specific events
127 return $OCF_SUCCESS
128 fi
129
130 ocf_log debug "ovndb_server: notified of event $type_op"
131 if [ "x${OCF_RESKEY_CRM_meta_notify_promote_uname}" = "x${host_name}" ]; then
132 # Record ourselves so that the agent has a better chance of doing
133 # the right thing at startup
134 ocf_log debug "ovndb_server: $host_name is the master"
135 ${CRM_ATTR_REPL_INFO} -v "$host_name"
136 if [ "$MANAGE_NORTHD" = "yes" ]; then
137 # Startup ovn-northd service
138 ${OVN_CTL} --ovn-manage-ovsdb=no start_northd
139 fi
140
141 else
142 if [ "$MANAGE_NORTHD" = "yes" ]; then
143 # Stop ovn-northd service. Set --ovn-manage-ovsdb=no so that
144 # ovn-ctl doesn't stop ovsdb-servers.
145 ${OVN_CTL} --ovn-manage-ovsdb=no stop_northd
146 fi
147 # Synchronize with the new master
148 ocf_log debug "ovndb_server: Connecting to the new master ${OCF_RESKEY_CRM_meta_notify_promote_uname}"
149 ${OVN_CTL} demote_ovnnb --db-nb-sync-from-addr=${MASTER_IP} \
150 --db-nb-sync-from-port=${NB_MASTER_PORT} \
151 --db-nb-sync-from-proto=${NB_MASTER_PROTO}
152 ${OVN_CTL} demote_ovnsb --db-sb-sync-from-addr=${MASTER_IP} \
153 --db-sb-sync-from-port=${SB_MASTER_PORT} \
154 --db-sb-sync-from-proto=${SB_MASTER_PROTO}
155 fi
156 }
157
158 ovsdb_server_usage() {
159 cat <<END
160 usage: $0 {start|stop|status|monitor|notify|validate-all|meta-data}
161
162 Expects to have a fully populated OCF RA-compliant environment set.
163 END
164 exit $1
165 }
166
167 ovsdb_server_find_active_master() {
168 # Operation sequence is Demote -> Stop -> Start -> Promote
169 # At the point this is run, the only active masters will be
170 # previous masters minus any that were scheduled to be demoted
171
172 for master in ${OCF_RESKEY_CRM_meta_notify_master_uname}; do
173 found=0
174 for old in ${OCF_RESKEY_CRM_meta_notify_demote_uname}; do
175 if [ $master = $old ]; then
176 found=1
177 fi
178 done
179 if [ $found = 0 ]; then
180 # Rely on master-max=1
181 # Pacemaker will demote any additional ones it finds before starting new copies
182 echo "$master"
183 return
184 fi
185 done
186
187 local expected_master=$($CRM_ATTR_REPL_INFO --query -q 2>/dev/null)
188 case "x${OCF_RESKEY_CRM_meta_notify_start_uname}x" in
189 *${expected_master}*) echo "${expected_master}";; # The previous master is expected to start
190 esac
191 }
192
193 ovsdb_server_find_active_peers() {
194 # Do we have any peers that are not stopping
195 for peer in ${OCF_RESKEY_CRM_meta_notify_slave_uname}; do
196 found=0
197 for old in ${OCF_RESKEY_CRM_meta_notify_stop_uname}; do
198 if [ $peer = $old ]; then
199 found=1
200 fi
201 done
202 if [ $found = 0 ]; then
203 # Rely on master-max=1
204 # Pacemaker will demote any additional ones it finds before starting new copies
205 echo "$peer"
206 return
207 fi
208 done
209 }
210
211 ovsdb_server_master_update() {
212
213 case $1 in
214 $OCF_SUCCESS)
215 $CRM_MASTER -v ${slave_score};;
216 $OCF_RUNNING_MASTER)
217 $CRM_MASTER -v ${master_score};;
218 #*) $CRM_MASTER -D;;
219 esac
220 }
221
222 ovsdb_server_monitor() {
223 ovsdb_server_check_status
224 rc=$?
225
226 ovsdb_server_master_update $rc
227 return $rc
228 }
229
230 ovsdb_server_check_status() {
231 local sb_status=`${OVN_CTL} status_ovnsb`
232 local nb_status=`${OVN_CTL} status_ovnnb`
233
234 if [[ $sb_status == "running/backup" && $nb_status == "running/backup" ]]; then
235 return $OCF_SUCCESS
236 fi
237
238 if [[ $sb_status == "running/active" && $nb_status == "running/active" ]]; then
239 return $OCF_RUNNING_MASTER
240 fi
241
242 # TODO: What about service running but not in either state above?
243 # Eg. a transient state where one db is "active" and the other
244 # "backup"
245
246 return $OCF_NOT_RUNNING
247 }
248
249 ovsdb_server_start() {
250 ovsdb_server_check_status
251 local status=$?
252 # If not in stopped state, return
253 if [ $status -ne $OCF_NOT_RUNNING ]; then
254 return $status
255 fi
256
257 local present_master=$(ovsdb_server_find_active_master)
258
259 set ${OVN_CTL}
260
261 set $@ --db-nb-addr=${MASTER_IP} --db-nb-port=${NB_MASTER_PORT}
262 set $@ --db-sb-addr=${MASTER_IP} --db-sb-port=${SB_MASTER_PORT}
263
264 if [ "x${NB_MASTER_PROTO}" = xtcp ]; then
265 set $@ --db-nb-create-insecure-remote=yes
266 fi
267
268 if [ "x${SB_MASTER_PROTO}" = xtcp ]; then
269 set $@ --db-sb-create-insecure-remote=yes
270 fi
271
272 if [ "x${present_master}" = x ]; then
273 # No master detected, or the previous master is not among the
274 # set starting.
275 #
276 # Force all copies to come up as slaves by pointing them into
277 # space and let pacemaker pick one to promote:
278 #
279 set $@ --db-nb-sync-from-addr=${INVALID_IP_ADDRESS} --db-sb-sync-from-addr=${INVALID_IP_ADDRESS}
280
281 elif [ ${present_master} != ${host_name} ]; then
282 # An existing master is active, connect to it
283 set $@ --db-nb-sync-from-addr=${MASTER_IP} --db-sb-sync-from-addr=${MASTER_IP}
284 set $@ --db-nb-sync-from-port=${NB_MASTER_PORT}
285 set $@ --db-nb-sync-from-proto=${NB_MASTER_PROTO}
286 set $@ --db-sb-sync-from-port=${SB_MASTER_PORT}
287 set $@ --db-sb-sync-from-proto=${SB_MASTER_PROTO}
288 fi
289
290 $@ start_ovsdb
291
292 while [ 1 = 1 ]; do
293 # It is important that we don't return until we're in a functional state
294 ovsdb_server_monitor
295 rc=$?
296 case $rc in
297 $OCF_SUCCESS) return $rc;;
298 $OCF_RUNNING_MASTER)
299 # When a slave node is promoted as master, the action would be
300 # STOP -> START -> PROMOTE.
301 # When the start action is called, it is possible for the
302 # ovsdb-server's to be started as active. This could happen
303 # if the node owns the $MASTER_IP. At this point, pacemaker
304 # has not promoted this node yet. So return OCF_SUCCESS.
305 # Let pacemaker promote it in subsequent actions.
306 # As per the OCF guidelines, only monitor action should return
307 # OCF_RUNNING_MASTER.
308 # http://www.linux-ha.org/doc/dev-guides/_literal_ocf_running_master_literal_8.html
309 return $OCF_SUCCESS;;
310 $OCF_ERR_GENERIC) return $rc;;
311 # Otherwise loop, waiting for the service to start, until
312 # the cluster times the operation out
313 esac
314 ocf_log warn "ovndb_servers: After starting ovsdb, status is $rc. Checking the status again"
315 done
316 }
317
318 ovsdb_server_stop() {
319 if [ "$MANAGE_NORTHD" = "yes" ]; then
320 # Stop ovn-northd service in case it was running. This is required
321 # when the master is demoted. For other cases, it would be a no-op.
322 # Set --ovn-manage-ovsdb=no so that ovn-ctl doesn't stop ovsdb-servers.
323 ${OVN_CTL} --ovn-manage-ovsdb=no stop_northd
324 fi
325
326 ovsdb_server_check_status
327 case $? in
328 $OCF_NOT_RUNNING) return ${OCF_SUCCESS};;
329 esac
330
331 ${OVN_CTL} stop_ovsdb
332 ovsdb_server_master_update ${OCF_NOT_RUNNING}
333
334 while [ 1 = 1 ]; do
335 # It is important that we don't return until we're stopped
336 ovsdb_server_check_status
337 rc=$?
338 case $rc in
339 $OCF_SUCCESS)
340 # Loop, waiting for the service to stop, until the
341 # cluster times the operation out
342 ocf_log warn "ovndb_servers: Even after stopping, the servers seems to be running"
343 ;;
344 $OCF_NOT_RUNNING)
345 return $OCF_SUCCESS
346 ;;
347 *)
348 return $rc
349 ;;
350 esac
351 done
352
353 return $OCF_ERR_GENERIC
354 }
355
356 ovsdb_server_promote() {
357 ovsdb_server_check_status
358 rc=$?
359 case $rc in
360 ${OCF_SUCCESS}) ;;
361 ${OCF_RUNNING_MASTER}) return ${OCF_SUCCESS};;
362 *)
363 ovsdb_server_master_update $OCF_RUNNING_MASTER
364 return ${rc}
365 ;;
366 esac
367
368 ${OVN_CTL} promote_ovnnb
369 ${OVN_CTL} promote_ovnsb
370
371 ocf_log debug "ovndb_servers: Promoting $host_name as the master"
372 # Record ourselves so that the agent has a better chance of doing
373 # the right thing at startup
374 ${CRM_ATTR_REPL_INFO} -v "$host_name"
375 ovsdb_server_master_update $OCF_RUNNING_MASTER
376 return $OCF_SUCCESS
377 }
378
379 ovsdb_server_demote() {
380 ovsdb_server_check_status
381 if [ $? = $OCF_NOT_RUNNING ]; then
382 return $OCF_NOT_RUNNING
383 fi
384
385 local present_master=$(ovsdb_server_find_active_master)
386 local recorded_master=$($CRM_ATTR_REPL_INFO --query -q 2>/dev/null)
387
388 ocf_log debug "ovndb_servers: Demoting $host_name, present master ${present_master}, recorded master ${recorded_master}"
389 if [ "x${recorded_master}" = "x${host_name}" -a "x${present_master}" = x ]; then
390 # We are the one and only master
391 # This should be the "normal" case
392 # The only way to be demoted is to call demote_ovn*
393 #
394 # The local database is only reset once we successfully
395 # connect to the peer. So specify one that doesn't exist.
396 #
397 # Eventually a new master will be promoted and we'll resync
398 # using the logic in ovsdb_server_notify()
399 ${OVN_CTL} demote_ovnnb --db-nb-sync-from-addr=${INVALID_IP_ADDRESS}
400 ${OVN_CTL} demote_ovnsb --db-sb-sync-from-addr=${INVALID_IP_ADDRESS}
401
402 elif [ "x${present_master}" = "x${host_name}" ]; then
403 # Safety check, should never be called
404 #
405 # Never allow sync'ing from ourselves, its a great way to
406 # erase the local DB
407 ${OVN_CTL} demote_ovnnb --db-nb-sync-from-addr=${INVALID_IP_ADDRESS}
408 ${OVN_CTL} demote_ovnsb --db-sb-sync-from-addr=${INVALID_IP_ADDRESS}
409
410 elif [ "x${present_master}" != x ]; then
411 # There are too many masters and we're an extra one that is
412 # being demoted. Sync to the surviving one
413 ${OVN_CTL} demote_ovnnb --db-nb-sync-from-addr=${MASTER_IP} \
414 --db-nb-sync-from-port=${NB_MASTER_PORT} \
415 --db-nb-sync-from-proto=${NB_MASTER_PROTO}
416 ${OVN_CTL} demote_ovnsb --db-sb-sync-from-addr=${MASTER_IP} \
417 --db-sb-sync-from-port=${SB_MASTER_PORT} \
418 --db-sb-sync-from-proto=${SB_MASTER_PROTO}
419
420 else
421 # For completeness, should never be called
422 #
423 # Something unexpected happened, perhaps CRM_ATTR_REPL_INFO is incorrect
424 ${OVN_CTL} demote_ovnnb --db-nb-sync-from-addr=${INVALID_IP_ADDRESS}
425 ${OVN_CTL} demote_ovnsb --db-sb-sync-from-addr=${INVALID_IP_ADDRESS}
426 fi
427
428 ovsdb_server_master_update $OCF_SUCCESS
429 return $OCF_SUCCESS
430 }
431
432 ovsdb_server_validate() {
433 if [ ! -e ${OVN_CTL} ]; then
434 return $OCF_ERR_INSTALLED
435 fi
436 return $OCF_SUCCESS
437 }
438
439
440 case $__OCF_ACTION in
441 start) ovsdb_server_start;;
442 stop) ovsdb_server_stop;;
443 promote) ovsdb_server_promote;;
444 demote) ovsdb_server_demote;;
445 notify) ovsdb_server_notify;;
446 meta-data) ovsdb_server_metadata;;
447 validate-all) ovsdb_server_validate;;
448 status|monitor) ovsdb_server_monitor;;
449 usage|help) ovsdb_server_usage $OCF_SUCCESS;;
450 *) ovsdb_server_usage $OCF_ERR_UNIMPLEMENTED ;;
451 esac
452
453 rc=$?
454 exit $rc