]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - fs/afs/vlocation.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[mirror_ubuntu-bionic-kernel.git] / fs / afs / vlocation.c
1 /* AFS volume location management
2 *
3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12 #include <linux/kernel.h>
13 #include <linux/module.h>
14 #include <linux/slab.h>
15 #include <linux/init.h>
16 #include <linux/sched.h>
17 #include "internal.h"
18
19 static unsigned afs_vlocation_timeout = 10; /* volume location timeout in seconds */
20 static unsigned afs_vlocation_update_timeout = 10 * 60;
21
22 static void afs_vlocation_reaper(struct work_struct *);
23 static void afs_vlocation_updater(struct work_struct *);
24
25 static LIST_HEAD(afs_vlocation_updates);
26 static LIST_HEAD(afs_vlocation_graveyard);
27 static DEFINE_SPINLOCK(afs_vlocation_updates_lock);
28 static DEFINE_SPINLOCK(afs_vlocation_graveyard_lock);
29 static DECLARE_DELAYED_WORK(afs_vlocation_reap, afs_vlocation_reaper);
30 static DECLARE_DELAYED_WORK(afs_vlocation_update, afs_vlocation_updater);
31 static struct workqueue_struct *afs_vlocation_update_worker;
32
33 /*
34 * iterate through the VL servers in a cell until one of them admits knowing
35 * about the volume in question
36 */
37 static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl,
38 struct key *key,
39 struct afs_cache_vlocation *vldb)
40 {
41 struct afs_cell *cell = vl->cell;
42 struct in_addr addr;
43 int count, ret;
44
45 _enter("%s,%s", cell->name, vl->vldb.name);
46
47 down_write(&vl->cell->vl_sem);
48 ret = -ENOMEDIUM;
49 for (count = cell->vl_naddrs; count > 0; count--) {
50 addr = cell->vl_addrs[cell->vl_curr_svix];
51
52 _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
53
54 /* attempt to access the VL server */
55 ret = afs_vl_get_entry_by_name(&addr, key, vl->vldb.name, vldb,
56 &afs_sync_call);
57 switch (ret) {
58 case 0:
59 goto out;
60 case -ENOMEM:
61 case -ENONET:
62 case -ENETUNREACH:
63 case -EHOSTUNREACH:
64 case -ECONNREFUSED:
65 if (ret == -ENOMEM || ret == -ENONET)
66 goto out;
67 goto rotate;
68 case -ENOMEDIUM:
69 case -EKEYREJECTED:
70 case -EKEYEXPIRED:
71 goto out;
72 default:
73 ret = -EIO;
74 goto rotate;
75 }
76
77 /* rotate the server records upon lookup failure */
78 rotate:
79 cell->vl_curr_svix++;
80 cell->vl_curr_svix %= cell->vl_naddrs;
81 }
82
83 out:
84 up_write(&vl->cell->vl_sem);
85 _leave(" = %d", ret);
86 return ret;
87 }
88
89 /*
90 * iterate through the VL servers in a cell until one of them admits knowing
91 * about the volume in question
92 */
93 static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
94 struct key *key,
95 afs_volid_t volid,
96 afs_voltype_t voltype,
97 struct afs_cache_vlocation *vldb)
98 {
99 struct afs_cell *cell = vl->cell;
100 struct in_addr addr;
101 int count, ret;
102
103 _enter("%s,%x,%d,", cell->name, volid, voltype);
104
105 down_write(&vl->cell->vl_sem);
106 ret = -ENOMEDIUM;
107 for (count = cell->vl_naddrs; count > 0; count--) {
108 addr = cell->vl_addrs[cell->vl_curr_svix];
109
110 _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
111
112 /* attempt to access the VL server */
113 ret = afs_vl_get_entry_by_id(&addr, key, volid, voltype, vldb,
114 &afs_sync_call);
115 switch (ret) {
116 case 0:
117 goto out;
118 case -ENOMEM:
119 case -ENONET:
120 case -ENETUNREACH:
121 case -EHOSTUNREACH:
122 case -ECONNREFUSED:
123 if (ret == -ENOMEM || ret == -ENONET)
124 goto out;
125 goto rotate;
126 case -EBUSY:
127 vl->upd_busy_cnt++;
128 if (vl->upd_busy_cnt <= 3) {
129 if (vl->upd_busy_cnt > 1) {
130 /* second+ BUSY - sleep a little bit */
131 set_current_state(TASK_UNINTERRUPTIBLE);
132 schedule_timeout(1);
133 }
134 continue;
135 }
136 break;
137 case -ENOMEDIUM:
138 vl->upd_rej_cnt++;
139 goto rotate;
140 default:
141 ret = -EIO;
142 goto rotate;
143 }
144
145 /* rotate the server records upon lookup failure */
146 rotate:
147 cell->vl_curr_svix++;
148 cell->vl_curr_svix %= cell->vl_naddrs;
149 vl->upd_busy_cnt = 0;
150 }
151
152 out:
153 if (ret < 0 && vl->upd_rej_cnt > 0) {
154 printk(KERN_NOTICE "kAFS:"
155 " Active volume no longer valid '%s'\n",
156 vl->vldb.name);
157 vl->valid = 0;
158 ret = -ENOMEDIUM;
159 }
160
161 up_write(&vl->cell->vl_sem);
162 _leave(" = %d", ret);
163 return ret;
164 }
165
166 /*
167 * allocate a volume location record
168 */
169 static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell,
170 const char *name,
171 size_t namesz)
172 {
173 struct afs_vlocation *vl;
174
175 vl = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL);
176 if (vl) {
177 vl->cell = cell;
178 vl->state = AFS_VL_NEW;
179 atomic_set(&vl->usage, 1);
180 INIT_LIST_HEAD(&vl->link);
181 INIT_LIST_HEAD(&vl->grave);
182 INIT_LIST_HEAD(&vl->update);
183 init_waitqueue_head(&vl->waitq);
184 spin_lock_init(&vl->lock);
185 memcpy(vl->vldb.name, name, namesz);
186 }
187
188 _leave(" = %p", vl);
189 return vl;
190 }
191
192 /*
193 * update record if we found it in the cache
194 */
195 static int afs_vlocation_update_record(struct afs_vlocation *vl,
196 struct key *key,
197 struct afs_cache_vlocation *vldb)
198 {
199 afs_voltype_t voltype;
200 afs_volid_t vid;
201 int ret;
202
203 /* try to look up a cached volume in the cell VL databases by ID */
204 _debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
205 vl->vldb.name,
206 vl->vldb.vidmask,
207 ntohl(vl->vldb.servers[0].s_addr),
208 vl->vldb.srvtmask[0],
209 ntohl(vl->vldb.servers[1].s_addr),
210 vl->vldb.srvtmask[1],
211 ntohl(vl->vldb.servers[2].s_addr),
212 vl->vldb.srvtmask[2]);
213
214 _debug("Vids: %08x %08x %08x",
215 vl->vldb.vid[0],
216 vl->vldb.vid[1],
217 vl->vldb.vid[2]);
218
219 if (vl->vldb.vidmask & AFS_VOL_VTM_RW) {
220 vid = vl->vldb.vid[0];
221 voltype = AFSVL_RWVOL;
222 } else if (vl->vldb.vidmask & AFS_VOL_VTM_RO) {
223 vid = vl->vldb.vid[1];
224 voltype = AFSVL_ROVOL;
225 } else if (vl->vldb.vidmask & AFS_VOL_VTM_BAK) {
226 vid = vl->vldb.vid[2];
227 voltype = AFSVL_BACKVOL;
228 } else {
229 BUG();
230 vid = 0;
231 voltype = 0;
232 }
233
234 /* contact the server to make sure the volume is still available
235 * - TODO: need to handle disconnected operation here
236 */
237 ret = afs_vlocation_access_vl_by_id(vl, key, vid, voltype, vldb);
238 switch (ret) {
239 /* net error */
240 default:
241 printk(KERN_WARNING "kAFS:"
242 " failed to update volume '%s' (%x) up in '%s': %d\n",
243 vl->vldb.name, vid, vl->cell->name, ret);
244 _leave(" = %d", ret);
245 return ret;
246
247 /* pulled from local cache into memory */
248 case 0:
249 _leave(" = 0");
250 return 0;
251
252 /* uh oh... looks like the volume got deleted */
253 case -ENOMEDIUM:
254 printk(KERN_ERR "kAFS:"
255 " volume '%s' (%x) does not exist '%s'\n",
256 vl->vldb.name, vid, vl->cell->name);
257
258 /* TODO: make existing record unavailable */
259 _leave(" = %d", ret);
260 return ret;
261 }
262 }
263
264 /*
265 * apply the update to a VL record
266 */
267 static void afs_vlocation_apply_update(struct afs_vlocation *vl,
268 struct afs_cache_vlocation *vldb)
269 {
270 _debug("Done VL Lookup: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
271 vldb->name, vldb->vidmask,
272 ntohl(vldb->servers[0].s_addr), vldb->srvtmask[0],
273 ntohl(vldb->servers[1].s_addr), vldb->srvtmask[1],
274 ntohl(vldb->servers[2].s_addr), vldb->srvtmask[2]);
275
276 _debug("Vids: %08x %08x %08x",
277 vldb->vid[0], vldb->vid[1], vldb->vid[2]);
278
279 if (strcmp(vldb->name, vl->vldb.name) != 0)
280 printk(KERN_NOTICE "kAFS:"
281 " name of volume '%s' changed to '%s' on server\n",
282 vl->vldb.name, vldb->name);
283
284 vl->vldb = *vldb;
285
286 #ifdef CONFIG_AFS_FSCACHE
287 fscache_update_cookie(vl->cache);
288 #endif
289 }
290
291 /*
292 * fill in a volume location record, consulting the cache and the VL server
293 * both
294 */
295 static int afs_vlocation_fill_in_record(struct afs_vlocation *vl,
296 struct key *key)
297 {
298 struct afs_cache_vlocation vldb;
299 int ret;
300
301 _enter("");
302
303 ASSERTCMP(vl->valid, ==, 0);
304
305 memset(&vldb, 0, sizeof(vldb));
306
307 /* see if we have an in-cache copy (will set vl->valid if there is) */
308 #ifdef CONFIG_AFS_FSCACHE
309 vl->cache = fscache_acquire_cookie(vl->cell->cache,
310 &afs_vlocation_cache_index_def, vl,
311 true);
312 #endif
313
314 if (vl->valid) {
315 /* try to update a known volume in the cell VL databases by
316 * ID as the name may have changed */
317 _debug("found in cache");
318 ret = afs_vlocation_update_record(vl, key, &vldb);
319 } else {
320 /* try to look up an unknown volume in the cell VL databases by
321 * name */
322 ret = afs_vlocation_access_vl_by_name(vl, key, &vldb);
323 if (ret < 0) {
324 printk("kAFS: failed to locate '%s' in cell '%s'\n",
325 vl->vldb.name, vl->cell->name);
326 return ret;
327 }
328 }
329
330 afs_vlocation_apply_update(vl, &vldb);
331 _leave(" = 0");
332 return 0;
333 }
334
335 /*
336 * queue a vlocation record for updates
337 */
338 static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl)
339 {
340 struct afs_vlocation *xvl;
341
342 /* wait at least 10 minutes before updating... */
343 vl->update_at = get_seconds() + afs_vlocation_update_timeout;
344
345 spin_lock(&afs_vlocation_updates_lock);
346
347 if (!list_empty(&afs_vlocation_updates)) {
348 /* ... but wait at least 1 second more than the newest record
349 * already queued so that we don't spam the VL server suddenly
350 * with lots of requests
351 */
352 xvl = list_entry(afs_vlocation_updates.prev,
353 struct afs_vlocation, update);
354 if (vl->update_at <= xvl->update_at)
355 vl->update_at = xvl->update_at + 1;
356 } else {
357 queue_delayed_work(afs_vlocation_update_worker,
358 &afs_vlocation_update,
359 afs_vlocation_update_timeout * HZ);
360 }
361
362 list_add_tail(&vl->update, &afs_vlocation_updates);
363 spin_unlock(&afs_vlocation_updates_lock);
364 }
365
366 /*
367 * lookup volume location
368 * - iterate through the VL servers in a cell until one of them admits knowing
369 * about the volume in question
370 * - lookup in the local cache if not able to find on the VL server
371 * - insert/update in the local cache if did get a VL response
372 */
373 struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *cell,
374 struct key *key,
375 const char *name,
376 size_t namesz)
377 {
378 struct afs_vlocation *vl;
379 int ret;
380
381 _enter("{%s},{%x},%*.*s,%zu",
382 cell->name, key_serial(key),
383 (int) namesz, (int) namesz, name, namesz);
384
385 if (namesz >= sizeof(vl->vldb.name)) {
386 _leave(" = -ENAMETOOLONG");
387 return ERR_PTR(-ENAMETOOLONG);
388 }
389
390 /* see if we have an in-memory copy first */
391 down_write(&cell->vl_sem);
392 spin_lock(&cell->vl_lock);
393 list_for_each_entry(vl, &cell->vl_list, link) {
394 if (vl->vldb.name[namesz] != '\0')
395 continue;
396 if (memcmp(vl->vldb.name, name, namesz) == 0)
397 goto found_in_memory;
398 }
399 spin_unlock(&cell->vl_lock);
400
401 /* not in the cell's in-memory lists - create a new record */
402 vl = afs_vlocation_alloc(cell, name, namesz);
403 if (!vl) {
404 up_write(&cell->vl_sem);
405 return ERR_PTR(-ENOMEM);
406 }
407
408 afs_get_cell(cell);
409
410 list_add_tail(&vl->link, &cell->vl_list);
411 vl->state = AFS_VL_CREATING;
412 up_write(&cell->vl_sem);
413
414 fill_in_record:
415 ret = afs_vlocation_fill_in_record(vl, key);
416 if (ret < 0)
417 goto error_abandon;
418 spin_lock(&vl->lock);
419 vl->state = AFS_VL_VALID;
420 spin_unlock(&vl->lock);
421 wake_up(&vl->waitq);
422
423 /* update volume entry in local cache */
424 #ifdef CONFIG_AFS_FSCACHE
425 fscache_update_cookie(vl->cache);
426 #endif
427
428 /* schedule for regular updates */
429 afs_vlocation_queue_for_updates(vl);
430 goto success;
431
432 found_in_memory:
433 /* found in memory */
434 _debug("found in memory");
435 atomic_inc(&vl->usage);
436 spin_unlock(&cell->vl_lock);
437 if (!list_empty(&vl->grave)) {
438 spin_lock(&afs_vlocation_graveyard_lock);
439 list_del_init(&vl->grave);
440 spin_unlock(&afs_vlocation_graveyard_lock);
441 }
442 up_write(&cell->vl_sem);
443
444 /* see if it was an abandoned record that we might try filling in */
445 spin_lock(&vl->lock);
446 while (vl->state != AFS_VL_VALID) {
447 afs_vlocation_state_t state = vl->state;
448
449 _debug("invalid [state %d]", state);
450
451 if (state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME) {
452 vl->state = AFS_VL_CREATING;
453 spin_unlock(&vl->lock);
454 goto fill_in_record;
455 }
456
457 /* must now wait for creation or update by someone else to
458 * complete */
459 _debug("wait");
460
461 spin_unlock(&vl->lock);
462 ret = wait_event_interruptible(vl->waitq,
463 vl->state == AFS_VL_NEW ||
464 vl->state == AFS_VL_VALID ||
465 vl->state == AFS_VL_NO_VOLUME);
466 if (ret < 0)
467 goto error;
468 spin_lock(&vl->lock);
469 }
470 spin_unlock(&vl->lock);
471
472 success:
473 _leave(" = %p", vl);
474 return vl;
475
476 error_abandon:
477 spin_lock(&vl->lock);
478 vl->state = AFS_VL_NEW;
479 spin_unlock(&vl->lock);
480 wake_up(&vl->waitq);
481 error:
482 ASSERT(vl != NULL);
483 afs_put_vlocation(vl);
484 _leave(" = %d", ret);
485 return ERR_PTR(ret);
486 }
487
488 /*
489 * finish using a volume location record
490 */
491 void afs_put_vlocation(struct afs_vlocation *vl)
492 {
493 if (!vl)
494 return;
495
496 _enter("%s", vl->vldb.name);
497
498 ASSERTCMP(atomic_read(&vl->usage), >, 0);
499
500 if (likely(!atomic_dec_and_test(&vl->usage))) {
501 _leave("");
502 return;
503 }
504
505 spin_lock(&afs_vlocation_graveyard_lock);
506 if (atomic_read(&vl->usage) == 0) {
507 _debug("buried");
508 list_move_tail(&vl->grave, &afs_vlocation_graveyard);
509 vl->time_of_death = get_seconds();
510 queue_delayed_work(afs_wq, &afs_vlocation_reap,
511 afs_vlocation_timeout * HZ);
512
513 /* suspend updates on this record */
514 if (!list_empty(&vl->update)) {
515 spin_lock(&afs_vlocation_updates_lock);
516 list_del_init(&vl->update);
517 spin_unlock(&afs_vlocation_updates_lock);
518 }
519 }
520 spin_unlock(&afs_vlocation_graveyard_lock);
521 _leave(" [killed?]");
522 }
523
524 /*
525 * destroy a dead volume location record
526 */
527 static void afs_vlocation_destroy(struct afs_vlocation *vl)
528 {
529 _enter("%p", vl);
530
531 #ifdef CONFIG_AFS_FSCACHE
532 fscache_relinquish_cookie(vl->cache, 0);
533 #endif
534 afs_put_cell(vl->cell);
535 kfree(vl);
536 }
537
538 /*
539 * reap dead volume location records
540 */
541 static void afs_vlocation_reaper(struct work_struct *work)
542 {
543 LIST_HEAD(corpses);
544 struct afs_vlocation *vl;
545 unsigned long delay, expiry;
546 time_t now;
547
548 _enter("");
549
550 now = get_seconds();
551 spin_lock(&afs_vlocation_graveyard_lock);
552
553 while (!list_empty(&afs_vlocation_graveyard)) {
554 vl = list_entry(afs_vlocation_graveyard.next,
555 struct afs_vlocation, grave);
556
557 _debug("check %p", vl);
558
559 /* the queue is ordered most dead first */
560 expiry = vl->time_of_death + afs_vlocation_timeout;
561 if (expiry > now) {
562 delay = (expiry - now) * HZ;
563 _debug("delay %lu", delay);
564 mod_delayed_work(afs_wq, &afs_vlocation_reap, delay);
565 break;
566 }
567
568 spin_lock(&vl->cell->vl_lock);
569 if (atomic_read(&vl->usage) > 0) {
570 _debug("no reap");
571 list_del_init(&vl->grave);
572 } else {
573 _debug("reap");
574 list_move_tail(&vl->grave, &corpses);
575 list_del_init(&vl->link);
576 }
577 spin_unlock(&vl->cell->vl_lock);
578 }
579
580 spin_unlock(&afs_vlocation_graveyard_lock);
581
582 /* now reap the corpses we've extracted */
583 while (!list_empty(&corpses)) {
584 vl = list_entry(corpses.next, struct afs_vlocation, grave);
585 list_del(&vl->grave);
586 afs_vlocation_destroy(vl);
587 }
588
589 _leave("");
590 }
591
592 /*
593 * initialise the VL update process
594 */
595 int __init afs_vlocation_update_init(void)
596 {
597 afs_vlocation_update_worker =
598 create_singlethread_workqueue("kafs_vlupdated");
599 return afs_vlocation_update_worker ? 0 : -ENOMEM;
600 }
601
602 /*
603 * discard all the volume location records for rmmod
604 */
605 void afs_vlocation_purge(void)
606 {
607 afs_vlocation_timeout = 0;
608
609 spin_lock(&afs_vlocation_updates_lock);
610 list_del_init(&afs_vlocation_updates);
611 spin_unlock(&afs_vlocation_updates_lock);
612 mod_delayed_work(afs_vlocation_update_worker, &afs_vlocation_update, 0);
613 destroy_workqueue(afs_vlocation_update_worker);
614
615 mod_delayed_work(afs_wq, &afs_vlocation_reap, 0);
616 }
617
618 /*
619 * update a volume location
620 */
621 static void afs_vlocation_updater(struct work_struct *work)
622 {
623 struct afs_cache_vlocation vldb;
624 struct afs_vlocation *vl, *xvl;
625 time_t now;
626 long timeout;
627 int ret;
628
629 _enter("");
630
631 now = get_seconds();
632
633 /* find a record to update */
634 spin_lock(&afs_vlocation_updates_lock);
635 for (;;) {
636 if (list_empty(&afs_vlocation_updates)) {
637 spin_unlock(&afs_vlocation_updates_lock);
638 _leave(" [nothing]");
639 return;
640 }
641
642 vl = list_entry(afs_vlocation_updates.next,
643 struct afs_vlocation, update);
644 if (atomic_read(&vl->usage) > 0)
645 break;
646 list_del_init(&vl->update);
647 }
648
649 timeout = vl->update_at - now;
650 if (timeout > 0) {
651 queue_delayed_work(afs_vlocation_update_worker,
652 &afs_vlocation_update, timeout * HZ);
653 spin_unlock(&afs_vlocation_updates_lock);
654 _leave(" [nothing]");
655 return;
656 }
657
658 list_del_init(&vl->update);
659 atomic_inc(&vl->usage);
660 spin_unlock(&afs_vlocation_updates_lock);
661
662 /* we can now perform the update */
663 _debug("update %s", vl->vldb.name);
664 vl->state = AFS_VL_UPDATING;
665 vl->upd_rej_cnt = 0;
666 vl->upd_busy_cnt = 0;
667
668 ret = afs_vlocation_update_record(vl, NULL, &vldb);
669 spin_lock(&vl->lock);
670 switch (ret) {
671 case 0:
672 afs_vlocation_apply_update(vl, &vldb);
673 vl->state = AFS_VL_VALID;
674 break;
675 case -ENOMEDIUM:
676 vl->state = AFS_VL_VOLUME_DELETED;
677 break;
678 default:
679 vl->state = AFS_VL_UNCERTAIN;
680 break;
681 }
682 spin_unlock(&vl->lock);
683 wake_up(&vl->waitq);
684
685 /* and then reschedule */
686 _debug("reschedule");
687 vl->update_at = get_seconds() + afs_vlocation_update_timeout;
688
689 spin_lock(&afs_vlocation_updates_lock);
690
691 if (!list_empty(&afs_vlocation_updates)) {
692 /* next update in 10 minutes, but wait at least 1 second more
693 * than the newest record already queued so that we don't spam
694 * the VL server suddenly with lots of requests
695 */
696 xvl = list_entry(afs_vlocation_updates.prev,
697 struct afs_vlocation, update);
698 if (vl->update_at <= xvl->update_at)
699 vl->update_at = xvl->update_at + 1;
700 xvl = list_entry(afs_vlocation_updates.next,
701 struct afs_vlocation, update);
702 timeout = xvl->update_at - now;
703 if (timeout < 0)
704 timeout = 0;
705 } else {
706 timeout = afs_vlocation_update_timeout;
707 }
708
709 ASSERT(list_empty(&vl->update));
710
711 list_add_tail(&vl->update, &afs_vlocation_updates);
712
713 _debug("timeout %ld", timeout);
714 queue_delayed_work(afs_vlocation_update_worker,
715 &afs_vlocation_update, timeout * HZ);
716 spin_unlock(&afs_vlocation_updates_lock);
717 afs_put_vlocation(vl);
718 }