]> git.proxmox.com Git - ceph.git/blame - ceph/src/tools/contrib/ceph-migrate-bluestore.bash
import quincy beta 17.1.0
[ceph.git] / ceph / src / tools / contrib / ceph-migrate-bluestore.bash
CommitLineData
20effc67
TL
1#!/bin/bash
2# https://tracker.ceph.com/issues/47839
3# Signed-off-by: Chris Dunlop <chris@onthe.net.au>
4
5
6######################################################################
7function usage
8{
9 cat <<END
10Usage: $0 osd device
11
12Description:
13
14Migrate an OSD from Filestore to BlueStore
15
16Where:
17
18osd - OSD ID to migrate
19device - raw device to migrate to, starting with /dev/disk/by-id/
20
21E.g.:
22
23ceph-migrate-bluestore 6 /dev/disk/by-id/ata-WDC_WD80EFZX-68UW8N0_VK0RKXTY
24
25END
26 exit 0
27}
28######################################################################
29
30shopt -s -o errexit nounset pipefail
31shopt -s extglob failglob inherit_errexit lastpipe
32
33[[ $# -eq 2 ]] || usage
34osd=$1
35bluestore_device=$2
36
37[[ $osd =~ ^[0-9]+$ ]] || error 'osd must be numeric'
38[[
39 -b $bluestore_device &&
40 $bluestore_device =~ ^/dev/disk/by-id/ &&
41 ! $bluestore_device =~ -part[0-9]+$
42]] || error "device must be a raw block device starting with /dev/disk/by-id/"
43
44######################################################################
45# Setup...
46#
47
48#
49# VG used for block.db LVs
50#
51vgdb='vg-861d7200-578c-45c2-a44c-2f0c56427bf1'
52vgs "${vgdb}" >& /dev/null || error "VG '${vgdb}' for block.db not found"
53
54#
55# Size of LV in $vgdb for the block.db
56#
57dblvsize=60G
58
59#
60# Prefix used for block LVs
61#
62block_prefix='osd-block'
63
64#
65# Some less(?) common we use - abort early if they're missing
66#
67cmds=(
68 bc
69 sgdisk
70)
71
72######################################################################
73# Functions...
74#
75function runcmd
76{
77 local IFS=' '
78 echo 1>&2 "$*"
79 "$@"
80}
81
82function is_uuid
83{
84 [[ $1 =~ ^[[:xdigit:]]{8}-[[:xdigit:]]{4}-[[:xdigit:]]{4}-[[:xdigit:]]{4}-[[:xdigit:]]{12}$ ]]
85}
86
87#
88# Compare the used size of the OSD with the new device
89# (and arbitrarily 20% larger 'cos we don't want to fill it up)
90#
91function check-device-size
92{
93 # "ceph osd df" fields 7 and 8 - "RAW USE", size and units
94 IFS=' ' read -r sz units <<< "$(ceph osd df | awk -v"id=${osd}" '$1==id { print $7, $8 }')"
95 case $units in
96 KiB) pow=1 ;;
97 MiB) pow=2 ;;
98 GiB) pow=3 ;;
99 TiB) pow=4 ;;
100 PiB) pow=5 ;;
101 *) error "ceph df: units not recognized: ${units}" ;;
102 esac
103 osdbytes=$(printf '%.0f' "$(bc <<< "${sz} * 1024^${pow} * 1.2")")
104
105 bdev=$(realpath "${bluestore_device}")
106 bdev=${bdev##*/}
107 [[ -e /sys/block/${bdev##*/}/size ]] || error "Can't find size for ${bluestore_device}"
108 bdevbytes=$(($(<"/sys/block/${bdev##*/}/size") * 512))
109
110 declare -p osdbytes bdevbytes
111
112 ((bdevbytes >= osdbytes)) || error "The block device isn't large enough"
113}
114
115#
116# Check things look ok
117#
118# Is there a better way of checking, other than manually?
119#
120function check-ceph-ok
121{
122 local ans=r
123
124 while [[ $ans = r ]]
125 do
126 runcmd ceph -s
127 read -r -p $'\nCheck status above and press r to recheck or <Enter> to continue with scrub' ans
128 done
129
130 #
131 # Run a scrub "to be sure, to be sure"
132 #
133 # For smaller OSDs we can see which PGs we need to watch for...
134 #
135 runcmd ceph pg ls-by-primary "${osd}" | awk '$1~/^[0-9]+\./ { print $1 }'
136 runcmd ceph osd scrub "${osd}"
137
138 hr
139 tail -n0 -f "/var/log/ceph/ceph-osd.${osd}.log" &
140 pid=$!
141 sleep 2
142 while ! read -r -t 10 -p $'\n\n\ntailing osd log file: press <Enter> to continue\n\n\n' ans
143 do
144 :
145 done
146 kill "${pid}"
147 hr
148
149 ans=r
150 while [[ $ans = r ]]
151 do
152 runcmd ceph -s
153 read -r -p $'\nCheck status above and press r to recheck or <Enter> to continue' ans
154 done
155}
156
157#
158# Disable the FileStore so it doesn't attempt to come back on reboot, but
159# so we can revert back to it if necessary
160#
161# https://en.wikipedia.org/wiki/GUID_Partition_Table#Partition_type_GUIDs
162# Partition GUID code: 4FBD7E29-9D25-41B8-AFD0-062C0CEFF05D (Ceph OSD)
163# Partition GUID code: 0FC63DAF-8483-4772-8E79-3D69D8477DE4 (Linux filesystem data)
164#
165function disable-filestore
166{
167 #
168 # Remove the original device from fstab if it's there
169 # (it may be in here for xfs with logdev etc.)
170 #
171 if grep -qE '^[^#[:space:]]+[[:space:]]+'"${osddir}"'[[:space:]]' /etc/fstab
172 then
173 [[ -e /etc/fstab.${0##*/} ]] || cp -a /etc/fstab{,."${0##*/}"}
174 sed -ri '/^[^#[:space:]]+[[:space:]]+'"${osddir//\//\\\/}"'[[:space:]]/ s/^/# /' /etc/fstab
175 fi
176
177 #
178 # Change the partition type
179 #
180 [[ -e ${osd_json%.json}.part ]] ||
181 runcmd sgdisk --backup="${osd_json%.json}.part" "${filestore_device}"
182 part_guid=$(sgdisk -i1 "${filestore_device}" | sed -rn 's/^Partition GUID code: ([[:xdigit:]-]+) .*/\1/p')
183 if [[ $part_guid = 4FBD7E29-9D25-41B8-AFD0-062C0CEFF05D ]]
184 then
185 runcmd sgdisk --typecode=1:0FC63DAF-8483-4772-8E79-3D69D8477DE4 "${filestore_device}"
186 echo "${filestore_device} partition 1 changed to type 0FC63DAF-8483-4772-8E79-3D69D8477DE4 (Linux filesystem data)"
187 fi
188}
189
190######################################################################
191# Processing...
192#
193
194#
195# Check we have the commands we need
196#
197for cmd in "${cmds[@]}"
198do
199 type "${cmd}" >& /dev/null || error "${cmd} utility required"
200done
201
202#
203# Get/check OSD
204#
205unit=ceph-osd@${osd}
206runcmd systemctl is-enabled "${unit}" ||
207 error "systemd unit ${unit} not enabled"
208
209osddir=/var/lib/ceph/osd/ceph-${osd}
210[[ -d $osddir ]] || error "No directory: ${osddir}"
211
212fsid=$(< "${osddir}/fsid")
213is_uuid "${fsid}" || error "fsid uuid not found in ${osddir}/fsid"
214
215osd_json=/etc/ceph/osd/${osd}-${fsid}.json
216[[ -f $osd_json ]] || error "File doesn't exist: ${osd_json}"
217
218lvnewdb=${vgdb}/osd-db-${fsid}
219authkey=$(sed -rn 's/^[[:space:]]+key[[:space:]]*=[[:space:]]*//p' "${osddir}/keyring")
220[[ $authkey ]] || error "Can't get authkey from ${osddir}/keyring"
221
222#
223# We want the device containing the FileStore version of the OSD
224# so we can disable it once the BlueStore version is up and running,
225# so the FileStore doesn't contend with the BlueStore on reboot etc.
226#
227filestore_device=$(awk '$2=="'"${osddir}"'" { print $1; }' /etc/mtab)
228[[ $filestore_device ]] || error "Can't find device currently mounted on ${osddir}"
229[[ $filestore_device =~ ^/dev/sd[a-z]+[0-9]*$ ]] || error "Don't recognize device currently mounted on ${osddir}: ${filestore_device}"
230filestore_device=${filestore_device%%+([0-9])}
231
232declare -p unit block_prefix bluestore_device osd osddir fsid osd_json lvnewdb authkey filestore_device
233
234runcmd check-device-size
235
236#
237# Create raw LV for block.db
238#
239runcmd lvcreate --yes -L "${dblvsize}" -n "${lvnewdb#*/}" "${lvnewdb%/*}"
240
241
242#
243# Prepare the new OSD
244# osd-list.orig is so we can work out which osd was created
245#
246ceph osd ls > /tmp/osd-list.orig
247runcmd ceph-volume lvm prepare --data "${bluestore_device}" --block.db "${lvnewdb}"
248
249#
250# Work out which OSD has been created
251# Is there a better way of doing this?
252#
253ceph osd ls > /tmp/osd-list.new
254
255new=$(comm -13 /tmp/osd-list.{orig,new})
256[[ $new =~ ^[0-9]+$ ]] || error "New OSD id not found"
257
258#
259# remove the new OSD from the ceph database
260# (it's left mounted)
261#
262runcmd ceph osd purge "${new}" --yes-i-really-mean-it
263
264#
265# Params for the newly created OSD
266#
267newdir=/var/lib/ceph/osd/ceph-${new}
268lvnew=$(readlink "${newdir}/block"); lvnew=${lvnew#/dev/}
269
270#
271# lvfix is what we're going to rename the LV to so
272# it ends in the (original) fsid
273#
274is_uuid "${lvnew#*/${block_prefix}-}" || error "LV not recognised: ${lvnew}"
275lvfix=${lvnew%%/*}/${block_prefix}-${fsid}
276
277declare -p new newdir lvnew lvfix
278
279#
280# the "dup" step only works if the destination has the same id and fsid
281# as the source: fix 'em up
282#
283new_fsid=$(< "${newdir}/fsid")
284args=(
285 --deltag "ceph.osd_id=${new}"
286 --addtag "ceph.osd_id=${osd}"
287
288 --deltag "ceph.osd_fsid=${new_fsid}"
289 --addtag "ceph.osd_fsid=${fsid}"
290
291 --deltag "ceph.block_device=${lvnew}"
292 --addtag "ceph.block_device=${lvfix}"
293)
294runcmd lvchange "${args[@]}" "${lvnew}"
295runcmd lvchange "${args[@]}" "${lvnewdb}"
296
297runcmd ceph-bluestore-tool set-label-key --dev "${newdir}/block" --key whoami --value "${osd}"
298runcmd ceph-bluestore-tool set-label-key --dev "${newdir}/block" --key osd_uuid --value "${fsid}"
299runcmd ceph-bluestore-tool set-label-key --dev "${newdir}/block.db" --key osd_uuid --value "${fsid}"
300
301echo "${fsid}" > "${newdir}/fsid"
302
303#
304# Rename the LV so it ends in the (original) fsid
305#
306runcmd lvrename "${lvnew}" "${lvfix}"
307runcmd ln -sf "/dev/${lvfix}" "${newdir}/block"
308lvnew=$lvfix
309
310#
311# Remove the flags that mkfs has already been done - otherwise mkfs skips the actual mkfs!
312#
313runcmd ceph-bluestore-tool rm-label-key --dev "${newdir}/block" --key mkfs_done
314runcmd rm "${newdir}/mkfs_done"
315
316#
317# Empty out the new OSD filesystem
318#
319runcmd ceph-objectstore-tool --type bluestore --data-path "${newdir}" --fsid "${fsid}" --op mkfs --no-mon-config
320
321#
322# Stop the osd - the copy can't proceed if it's busy
323#
324runcmd systemctl is-active --quiet "ceph-osd@${osd}" &&
325 runcmd systemctl stop "ceph-osd@${osd}"
326
327#
328# The actual copy...
329#
330runcmd time ceph-objectstore-tool --type filestore --data-path "/var/lib/ceph/osd/ceph-${osd}" --target-data-path "${newdir}" --op dup
331
332#
333# Fix up some keys from the copy
334#
335printf '[osd.%d]\n\tkey = %s\n' "${osd}" "${authkey}" > "${newdir}/key"
336ceph-bluestore-tool set-label-key --dev "${newdir}/block" --key osd_key --value "${authkey}"
337ceph-bluestore-tool rm-label-key --dev "${newdir}/block" --key fsid
338
339#
340# Move the FileStore config file out of the way to avoid it being used on boot
341#
342runcmd mv "${osd_json}"{,.orig}
343
344#
345# prepare the mount points
346#
347runcmd umount "${osddir}"
348runcmd umount "${newdir}"
349runcmd rmdir "${newdir}"
350
351#
352# Start the new BlueStore version of the OSD
353#
354runcmd ceph-volume lvm trigger "${osd}-${fsid}"
355
356#
357# Let things settle a little then check the new OSD is running
358#
359sleep 5
360if ! systemctl is-active --quiet "${unit}"
361then
362 systemctl status "${unit}"
363 exit 1
364fi
365
366runcmd check-ceph-ok
367
368runcmd disable-filestore
369
370exit 0