]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - fs/afs/volume.c
afs: Overhaul volume and server record caching and fileserver rotation
[mirror_ubuntu-bionic-kernel.git] / fs / afs / volume.c
CommitLineData
ec26815a 1/* AFS volume management
1da177e4 2 *
08e0e7c8 3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
1da177e4
LT
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/kernel.h>
1da177e4 13#include <linux/slab.h>
1da177e4
LT
14#include "internal.h"
15
d2ddc776
DH
16unsigned __read_mostly afs_volume_gc_delay = 10;
17unsigned __read_mostly afs_volume_record_life = 60 * 60;
18
19static const char *const afs_voltypes[] = { "R/W", "R/O", "BAK" };
1da177e4 20
1da177e4 21/*
d2ddc776
DH
22 * Allocate a volume record and load it up from a vldb record.
23 */
24static struct afs_volume *afs_alloc_volume(struct afs_mount_params *params,
25 struct afs_vldb_entry *vldb,
26 unsigned long type_mask)
27{
28 struct afs_server_list *slist;
29 struct afs_server *server;
30 struct afs_volume *volume;
31 int ret = -ENOMEM, nr_servers = 0, i, j;
32
33 for (i = 0; i < vldb->nr_servers; i++)
34 if (vldb->fs_mask[i] & type_mask)
35 nr_servers++;
36
37 volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
38 if (!volume)
39 goto error_0;
40
41 volume->vid = vldb->vid[params->type];
42 volume->update_at = ktime_get_real_seconds() + afs_volume_record_life;
43 volume->cell = afs_get_cell(params->cell);
44 volume->type = params->type;
45 volume->type_force = params->force;
46 volume->name_len = vldb->name_len;
47
48 atomic_set(&volume->usage, 1);
49 INIT_LIST_HEAD(&volume->proc_link);
50 rwlock_init(&volume->servers_lock);
51 memcpy(volume->name, vldb->name, vldb->name_len + 1);
52
53 slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask);
54 if (IS_ERR(slist)) {
55 ret = PTR_ERR(slist);
56 goto error_1;
57 }
58
59 refcount_set(&slist->usage, 1);
60 volume->servers = slist;
61
62 /* Make sure a records exists for each server this volume occupies. */
63 for (i = 0; i < nr_servers; i++) {
64 if (!(vldb->fs_mask[i] & type_mask))
65 continue;
66
67 server = afs_lookup_server(params->cell, params->key,
68 &vldb->fs_server[i]);
69 if (IS_ERR(server)) {
70 ret = PTR_ERR(server);
71 if (ret == -ENOENT)
72 continue;
73 goto error_2;
74 }
75
76 /* Insertion-sort by server pointer */
77 for (j = 0; j < slist->nr_servers; j++)
78 if (slist->servers[j].server >= server)
79 break;
80 if (j < slist->nr_servers) {
81 if (slist->servers[j].server == server) {
82 afs_put_server(params->net, server);
83 continue;
84 }
85
86 memmove(slist->servers + j + 1,
87 slist->servers + j,
88 (slist->nr_servers - j) * sizeof(struct afs_server_entry));
89 }
90
91 slist->servers[j].server = server;
92 slist->nr_servers++;
93 }
94
95 if (slist->nr_servers == 0) {
96 ret = -EDESTADDRREQ;
97 goto error_2;
98 }
99
100 return volume;
101
102error_2:
103 afs_put_serverlist(params->net, slist);
104error_1:
105 kfree(volume);
106error_0:
107 return ERR_PTR(ret);
108}
109
110/*
111 * Look up a VLDB record for a volume.
112 */
113static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell,
114 struct key *key,
115 const char *volname,
116 size_t volnamesz)
117{
118 struct afs_addr_cursor ac;
119 struct afs_vldb_entry *vldb;
120 int ret;
121
122 ret = afs_set_vl_cursor(&ac, cell);
123 if (ret < 0)
124 return ERR_PTR(ret);
125
126 while (afs_iterate_addresses(&ac)) {
127 vldb = afs_vl_get_entry_by_name_u(cell->net, &ac, key,
128 volname, volnamesz);
129 switch (ac.error) {
130 case 0:
131 afs_end_cursor(&ac);
132 return vldb;
133 case -ECONNABORTED:
134 ac.error = afs_abort_to_error(ac.abort_code);
135 goto error;
136 case -ENOMEM:
137 case -ENONET:
138 goto error;
139 case -ENETUNREACH:
140 case -EHOSTUNREACH:
141 case -ECONNREFUSED:
142 break;
143 default:
144 ac.error = -EIO;
145 goto error;
146 }
147 }
148
149error:
150 return ERR_PTR(afs_end_cursor(&ac));
151}
152
153/*
154 * Look up a volume in the VL server and create a candidate volume record for
155 * it.
156 *
157 * The volume name can be one of the following:
1da177e4
LT
158 * "%[cell:]volume[.]" R/W volume
159 * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0),
160 * or R/W (rwparent=1) volume
161 * "%[cell:]volume.readonly" R/O volume
162 * "#[cell:]volume.readonly" R/O volume
163 * "%[cell:]volume.backup" Backup volume
164 * "#[cell:]volume.backup" Backup volume
165 *
166 * The cell name is optional, and defaults to the current cell.
167 *
168 * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin
169 * Guide
170 * - Rule 1: Explicit type suffix forces access of that type or nothing
171 * (no suffix, then use Rule 2 & 3)
172 * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W
173 * if not available
174 * - Rule 3: If parent volume is R/W, then only mount R/W volume unless
175 * explicitly told otherwise
176 */
d2ddc776 177struct afs_volume *afs_create_volume(struct afs_mount_params *params)
1da177e4 178{
d2ddc776
DH
179 struct afs_vldb_entry *vldb;
180 struct afs_volume *volume;
181 unsigned long type_mask = 1UL << params->type;
1da177e4 182
d2ddc776
DH
183 vldb = afs_vl_lookup_vldb(params->cell, params->key,
184 params->volname, params->volnamesz);
185 if (IS_ERR(vldb))
186 return ERR_CAST(vldb);
1da177e4 187
d2ddc776
DH
188 if (test_bit(AFS_VLDB_QUERY_ERROR, &vldb->flags)) {
189 volume = ERR_PTR(vldb->error);
190 goto error;
191 }
1da177e4 192
d2ddc776
DH
193 /* Make the final decision on the type we want */
194 volume = ERR_PTR(-ENOMEDIUM);
00d3b7a4 195 if (params->force) {
d2ddc776 196 if (!(vldb->flags & type_mask))
1da177e4 197 goto error;
d2ddc776 198 } else if (test_bit(AFS_VLDB_HAS_RO, &vldb->flags)) {
00d3b7a4 199 params->type = AFSVL_ROVOL;
d2ddc776 200 } else if (test_bit(AFS_VLDB_HAS_RW, &vldb->flags)) {
00d3b7a4 201 params->type = AFSVL_RWVOL;
ec26815a 202 } else {
1da177e4
LT
203 goto error;
204 }
205
d2ddc776
DH
206 type_mask = 1UL << params->type;
207 volume = afs_alloc_volume(params, vldb, type_mask);
1da177e4 208
d2ddc776
DH
209error:
210 kfree(vldb);
211 return volume;
212}
1da177e4 213
d2ddc776
DH
214/*
215 * Destroy a volume record
216 */
217static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume)
218{
219 _enter("%p", volume);
1da177e4 220
d2ddc776
DH
221#ifdef CONFIG_AFS_FSCACHE
222 ASSERTCMP(volume->cache, ==, NULL);
223#endif
1da177e4 224
d2ddc776
DH
225 afs_put_serverlist(net, volume->servers);
226 afs_put_cell(net, volume->cell);
227 kfree(volume);
1da177e4 228
d2ddc776
DH
229 _leave(" [destroyed]");
230}
231
232/*
233 * Drop a reference on a volume record.
234 */
235void afs_put_volume(struct afs_cell *cell, struct afs_volume *volume)
236{
237 if (volume) {
238 _enter("%s", volume->name);
239
240 if (atomic_dec_and_test(&volume->usage))
241 afs_destroy_volume(cell->net, volume);
1da177e4 242 }
d2ddc776 243}
1da177e4 244
d2ddc776
DH
245/*
246 * Activate a volume.
247 */
248void afs_activate_volume(struct afs_volume *volume)
249{
9b3f26c9 250#ifdef CONFIG_AFS_FSCACHE
ad6a942a 251 volume->cache = fscache_acquire_cookie(volume->cell->cache,
9b3f26c9 252 &afs_volume_cache_index_def,
94d30ae9 253 volume, true);
1da177e4 254#endif
1da177e4 255
d2ddc776
DH
256 write_lock(&volume->cell->proc_lock);
257 list_add_tail(&volume->proc_link, &volume->cell->proc_volumes);
258 write_unlock(&volume->cell->proc_lock);
259}
1da177e4 260
d2ddc776
DH
261/*
262 * Deactivate a volume.
263 */
264void afs_deactivate_volume(struct afs_volume *volume)
265{
266 _enter("%s", volume->name);
1da177e4 267
d2ddc776
DH
268 write_lock(&volume->cell->proc_lock);
269 list_del_init(&volume->proc_link);
270 write_unlock(&volume->cell->proc_lock);
1da177e4 271
d2ddc776
DH
272#ifdef CONFIG_AFS_FSCACHE
273 fscache_relinquish_cookie(volume->cache,
274 test_bit(AFS_VOLUME_DELETED, &volume->flags));
275 volume->cache = NULL;
276#endif
1da177e4 277
d2ddc776 278 _leave("");
ec26815a 279}
1da177e4 280
1da177e4 281/*
d2ddc776 282 * Query the VL service to update the volume status.
1da177e4 283 */
d2ddc776 284static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
1da177e4 285{
d2ddc776
DH
286 struct afs_server_list *new, *old, *discard;
287 struct afs_vldb_entry *vldb;
288 char idbuf[16];
289 int ret, idsz;
1da177e4 290
d2ddc776 291 _enter("");
1da177e4 292
d2ddc776
DH
293 /* We look up an ID by passing it as a decimal string in the
294 * operation's name parameter.
295 */
296 idsz = sprintf(idbuf, "%u", volume->vid);
1da177e4 297
d2ddc776
DH
298 vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz);
299 if (IS_ERR(vldb)) {
300 ret = PTR_ERR(vldb);
301 goto error;
302 }
1da177e4 303
d2ddc776
DH
304 /* See if the volume got renamed. */
305 if (vldb->name_len != volume->name_len ||
306 memcmp(vldb->name, volume->name, vldb->name_len) != 0) {
307 /* TODO: Use RCU'd string. */
308 memcpy(volume->name, vldb->name, AFS_MAXVOLNAME);
309 volume->name_len = vldb->name_len;
310 }
311
312 /* See if the volume's server list got updated. */
313 new = afs_alloc_server_list(volume->cell, key,
314 vldb, (1 << volume->type));
315 if (IS_ERR(new)) {
316 ret = PTR_ERR(new);
317 goto error_vldb;
318 }
1da177e4 319
d2ddc776 320 write_lock(&volume->servers_lock);
1da177e4 321
d2ddc776
DH
322 discard = new;
323 old = volume->servers;
324 if (afs_annotate_server_list(new, old)) {
325 new->seq = volume->servers_seq + 1;
326 volume->servers = new;
327 smp_wmb();
328 volume->servers_seq++;
329 discard = old;
1da177e4
LT
330 }
331
d2ddc776
DH
332 volume->update_at = ktime_get_real_seconds() + afs_volume_record_life;
333 clear_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags);
334 write_unlock(&volume->servers_lock);
335 ret = 0;
1da177e4 336
d2ddc776
DH
337 afs_put_serverlist(volume->cell->net, discard);
338error_vldb:
339 kfree(vldb);
340error:
341 _leave(" = %d", ret);
342 return ret;
343}
1da177e4 344
d2ddc776
DH
345/*
346 * Make sure the volume record is up to date.
347 */
348int afs_check_volume_status(struct afs_volume *volume, struct key *key)
349{
350 time64_t now = ktime_get_real_seconds();
351 int ret, retries = 0;
1da177e4 352
d2ddc776
DH
353 _enter("");
354
355 if (volume->update_at <= now)
356 set_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags);
357
358retry:
359 if (!test_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags) &&
360 !test_bit(AFS_VOLUME_WAIT, &volume->flags)) {
361 _leave(" = 0");
362 return 0;
c435ee34 363 }
1da177e4 364
d2ddc776
DH
365 if (!test_and_set_bit_lock(AFS_VOLUME_UPDATING, &volume->flags)) {
366 ret = afs_update_volume_status(volume, key);
367 clear_bit_unlock(AFS_VOLUME_WAIT, &volume->flags);
368 clear_bit_unlock(AFS_VOLUME_UPDATING, &volume->flags);
369 wake_up_bit(&volume->flags, AFS_VOLUME_WAIT);
370 _leave(" = %d", ret);
371 return ret;
372 }
1da177e4 373
d2ddc776
DH
374 if (!test_bit(AFS_VOLUME_WAIT, &volume->flags)) {
375 _leave(" = 0 [no wait]");
376 return 0;
377 }
378
379 ret = wait_on_bit(&volume->flags, AFS_VOLUME_WAIT, TASK_INTERRUPTIBLE);
380 if (ret == -ERESTARTSYS) {
381 _leave(" = %d", ret);
382 return ret;
383 }
384
385 retries++;
386 if (retries == 4) {
387 _leave(" = -ESTALE");
388 return -ESTALE;
389 }
390 goto retry;
ec26815a 391}