]> git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/cephadm/migrations.py
import 15.2.5
[ceph.git] / ceph / src / pybind / mgr / cephadm / migrations.py
1 import logging
2 from typing import TYPE_CHECKING, Iterator
3
4 from ceph.deployment.service_spec import PlacementSpec, ServiceSpec, HostPlacementSpec
5 from cephadm.schedule import HostAssignment
6
7 from orchestrator import OrchestratorError
8
9 if TYPE_CHECKING:
10 from .module import CephadmOrchestrator
11
12 LAST_MIGRATION = 2
13
14 logger = logging.getLogger(__name__)
15
16
17 class Migrations:
18 def __init__(self, mgr: "CephadmOrchestrator"):
19 self.mgr = mgr
20
21 # Why having a global counter, instead of spec versions?
22 #
23 # for the first migration:
24 # The specs don't change in (this) migration. but the scheduler here.
25 # Adding the version to the specs at this time just felt wrong to me.
26 #
27 # And the specs are only another part of cephadm which needs potential upgrades.
28 # We have the cache, the inventory, the config store, the upgrade (imagine changing the
29 # upgrade code, while an old upgrade is still in progress), naming of daemons,
30 # fs-layout of the daemons, etc.
31 if self.mgr.migration_current is None:
32 self.set(0)
33
34 # for some migrations, we don't need to do anything except for
35 # setting migration_current = 1.
36 # let's try to shortcut things here.
37 self.migrate()
38
39 def set(self, val):
40 self.mgr.set_module_option('migration_current', val)
41 self.mgr.migration_current = val
42
43 def is_migration_ongoing(self):
44 return self.mgr.migration_current != LAST_MIGRATION
45
46 def verify_no_migration(self):
47 if self.is_migration_ongoing():
48 # this is raised in module.serve()
49 raise OrchestratorError(
50 "cephadm migration still ongoing. Please wait, until the migration is complete.")
51
52 def migrate(self):
53 if self.mgr.migration_current == 0:
54 if self.migrate_0_1():
55 self.set(1)
56
57 if self.mgr.migration_current == 1:
58 if self.migrate_1_2():
59 self.set(2)
60
61 def migrate_0_1(self) -> bool:
62 """
63 Migration 0 -> 1
64 New scheduler that takes PlacementSpec as the bound and not as recommendation.
65 I.e. the new scheduler won't suggest any new placements outside of the hosts
66 specified by label etc.
67
68 Which means, we have to make sure, we're not removing any daemons directly after
69 upgrading to the new scheduler.
70
71 There is a potential race here:
72 1. user updates his spec to remove daemons
73 2. mgr gets upgraded to new scheduler, before the old scheduler removed the daemon
74 3. now, we're converting the spec to explicit placement, thus reverting (1.)
75 I think this is ok.
76 """
77
78 def interesting_specs() -> Iterator[ServiceSpec]:
79 for s in self.mgr.spec_store.specs.values():
80 if s.unmanaged:
81 continue
82 p = s.placement
83 if p is None:
84 continue
85 if p.count is None:
86 continue
87 if not p.hosts and not p.host_pattern and not p.label:
88 continue
89 yield s
90
91 def convert_to_explicit(spec: ServiceSpec) -> None:
92 placements = HostAssignment(
93 spec=spec,
94 get_hosts_func=self.mgr._get_hosts,
95 get_daemons_func=self.mgr.cache.get_daemons_by_service
96 ).place()
97
98 existing_daemons = self.mgr.cache.get_daemons_by_service(spec.service_name())
99
100 # We have to migrate, only if the new scheduler would remove daemons
101 if len(placements) >= len(existing_daemons):
102 return
103
104 old_hosts = {h.hostname: h for h in spec.placement.hosts}
105 new_hosts = [
106 old_hosts[d.hostname] if d.hostname in old_hosts else HostPlacementSpec(
107 hostname=d.hostname, network='', name='')
108 for d in existing_daemons
109 ]
110
111 new_placement = PlacementSpec(
112 hosts=new_hosts,
113 count=spec.placement.count
114 )
115
116 new_spec = ServiceSpec.from_json(spec.to_json())
117 new_spec.placement = new_placement
118
119 logger.info(f"Migrating {spec.one_line_str()} to explicit placement")
120
121 self.mgr.spec_store.save(new_spec)
122
123 specs = list(interesting_specs())
124 if not specs:
125 return True # nothing to do. shortcut
126
127 if not self.mgr.cache.daemon_cache_filled():
128 logger.info("Unable to migrate yet. Daemon Cache still incomplete.")
129 return False
130
131 for spec in specs:
132 convert_to_explicit(spec)
133
134 return True
135
136 def migrate_1_2(self) -> bool:
137 """
138 After 15.2.4, we unified some service IDs: MONs, MGRs etc no longer have a service id.
139 Which means, the service names changed:
140
141 mon.foo -> mon
142 mgr.foo -> mgr
143
144 This fixes the data structure consistency
145 """
146 bad_specs = {}
147 for name, spec in self.mgr.spec_store.specs.items():
148 if name != spec.service_name():
149 bad_specs[name] = (spec.service_name(), spec)
150
151 for old, (new, old_spec) in bad_specs.items():
152 if new not in self.mgr.spec_store.specs:
153 spec = old_spec
154 else:
155 spec = self.mgr.spec_store.specs[new]
156 spec.unmanaged = True
157 self.mgr.spec_store.save(spec)
158 self.mgr.spec_store.rm(old)
159
160 return True