]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - fs/xfs/xfs_qm.c
xfs: implement lazy removal for the dquot freelist
[mirror_ubuntu-zesty-kernel.git] / fs / xfs / xfs_qm.c
CommitLineData
1da177e4 1/*
4ce3121f
NS
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
1da177e4 4 *
4ce3121f
NS
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
1da177e4
LT
7 * published by the Free Software Foundation.
8 *
4ce3121f
NS
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
1da177e4 13 *
4ce3121f
NS
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1da177e4 17 */
1da177e4
LT
18#include "xfs.h"
19#include "xfs_fs.h"
a844f451 20#include "xfs_bit.h"
1da177e4 21#include "xfs_log.h"
a844f451 22#include "xfs_inum.h"
1da177e4
LT
23#include "xfs_trans.h"
24#include "xfs_sb.h"
25#include "xfs_ag.h"
1da177e4 26#include "xfs_alloc.h"
1da177e4
LT
27#include "xfs_quota.h"
28#include "xfs_mount.h"
1da177e4
LT
29#include "xfs_bmap_btree.h"
30#include "xfs_ialloc_btree.h"
1da177e4
LT
31#include "xfs_dinode.h"
32#include "xfs_inode.h"
a844f451
NS
33#include "xfs_ialloc.h"
34#include "xfs_itable.h"
1da177e4
LT
35#include "xfs_rtalloc.h"
36#include "xfs_error.h"
a844f451 37#include "xfs_bmap.h"
1da177e4
LT
38#include "xfs_attr.h"
39#include "xfs_buf_item.h"
40#include "xfs_trans_space.h"
41#include "xfs_utils.h"
1da177e4 42#include "xfs_qm.h"
0b1b213f 43#include "xfs_trace.h"
1da177e4
LT
44
45/*
46 * The global quota manager. There is only one of these for the entire
47 * system, _not_ one per file system. XQM keeps track of the overall
48 * quota functionality, including maintaining the freelist and hash
49 * tables of dquots.
50 */
a0b0b8a5 51struct mutex xfs_Gqm_lock;
1da177e4 52struct xfs_qm *xfs_Gqm;
6b3f6b5b 53uint ndquot;
1da177e4
LT
54
55kmem_zone_t *qm_dqzone;
56kmem_zone_t *qm_dqtrxzone;
1da177e4
LT
57
58STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int);
59STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
60
61STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
ba0f32d4 62STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
1495f230 63STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *);
1da177e4 64
8e1f936b
RR
65static struct shrinker xfs_qm_shaker = {
66 .shrink = xfs_qm_shake,
67 .seeks = DEFAULT_SEEKS,
68};
69
1da177e4
LT
70/*
71 * Initialize the XQM structure.
72 * Note that there is not one quota manager per file system.
73 */
74STATIC struct xfs_qm *
75xfs_Gqm_init(void)
76{
6b3f6b5b
NS
77 xfs_dqhash_t *udqhash, *gdqhash;
78 xfs_qm_t *xqm;
215101c3
NS
79 size_t hsize;
80 uint i;
1da177e4
LT
81
82 /*
83 * Initialize the dquot hash tables.
84 */
77e4635a 85 udqhash = kmem_zalloc_greedy(&hsize,
5995cb7d 86 XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
bdfb0430
CH
87 XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t));
88 if (!udqhash)
89 goto out;
90
91 gdqhash = kmem_zalloc_large(hsize);
d67b1b03 92 if (!gdqhash)
bdfb0430
CH
93 goto out_free_udqhash;
94
77e4635a 95 hsize /= sizeof(xfs_dqhash_t);
6b3f6b5b 96 ndquot = hsize << 8;
1da177e4 97
6b3f6b5b
NS
98 xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
99 xqm->qm_dqhashmask = hsize - 1;
100 xqm->qm_usr_dqhtable = udqhash;
101 xqm->qm_grp_dqhtable = gdqhash;
1da177e4
LT
102 ASSERT(xqm->qm_usr_dqhtable != NULL);
103 ASSERT(xqm->qm_grp_dqhtable != NULL);
104
105 for (i = 0; i < hsize; i++) {
106 xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
107 xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
108 }
109
110 /*
111 * Freelist of all dquots of all file systems
112 */
3a8406f6
DC
113 INIT_LIST_HEAD(&xqm->qm_dqfrlist);
114 xqm->qm_dqfrlist_cnt = 0;
115 mutex_init(&xqm->qm_dqfrlist_lock);
1da177e4
LT
116
117 /*
118 * dquot zone. we register our own low-memory callback.
119 */
120 if (!qm_dqzone) {
121 xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
122 "xfs_dquots");
123 qm_dqzone = xqm->qm_dqzone;
124 } else
125 xqm->qm_dqzone = qm_dqzone;
126
8e1f936b 127 register_shrinker(&xfs_qm_shaker);
1da177e4
LT
128
129 /*
130 * The t_dqinfo portion of transactions.
131 */
132 if (!qm_dqtrxzone) {
133 xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
134 "xfs_dqtrx");
135 qm_dqtrxzone = xqm->qm_dqtrxzone;
136 } else
137 xqm->qm_dqtrxzone = qm_dqtrxzone;
138
139 atomic_set(&xqm->qm_totaldquots, 0);
140 xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
141 xqm->qm_nrefs = 0;
1da177e4 142 return xqm;
bdfb0430
CH
143
144 out_free_udqhash:
145 kmem_free_large(udqhash);
146 out:
147 return NULL;
1da177e4
LT
148}
149
150/*
151 * Destroy the global quota manager when its reference count goes to zero.
152 */
ba0f32d4 153STATIC void
1da177e4
LT
154xfs_qm_destroy(
155 struct xfs_qm *xqm)
156{
157 int hsize, i;
158
159 ASSERT(xqm != NULL);
160 ASSERT(xqm->qm_nrefs == 0);
80a376bf 161
8e1f936b 162 unregister_shrinker(&xfs_qm_shaker);
80a376bf
CH
163
164 mutex_lock(&xqm->qm_dqfrlist_lock);
165 ASSERT(list_empty(&xqm->qm_dqfrlist));
166 mutex_unlock(&xqm->qm_dqfrlist_lock);
167
1da177e4
LT
168 hsize = xqm->qm_dqhashmask + 1;
169 for (i = 0; i < hsize; i++) {
170 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
171 xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
172 }
bdfb0430
CH
173 kmem_free_large(xqm->qm_usr_dqhtable);
174 kmem_free_large(xqm->qm_grp_dqhtable);
1da177e4
LT
175 xqm->qm_usr_dqhtable = NULL;
176 xqm->qm_grp_dqhtable = NULL;
177 xqm->qm_dqhashmask = 0;
3a8406f6 178
f0e2d93c 179 kmem_free(xqm);
1da177e4
LT
180}
181
182/*
183 * Called at mount time to let XQM know that another file system is
184 * starting quotas. This isn't crucial information as the individual mount
185 * structures are pretty independent, but it helps the XQM keep a
186 * global view of what's going on.
187 */
188/* ARGSUSED */
189STATIC int
190xfs_qm_hold_quotafs_ref(
191 struct xfs_mount *mp)
192{
193 /*
194 * Need to lock the xfs_Gqm structure for things like this. For example,
195 * the structure could disappear between the entry to this routine and
196 * a HOLD operation if not locked.
197 */
e2494582 198 mutex_lock(&xfs_Gqm_lock);
1da177e4 199
bdfb0430 200 if (!xfs_Gqm) {
1da177e4 201 xfs_Gqm = xfs_Gqm_init();
38e712ab
JL
202 if (!xfs_Gqm) {
203 mutex_unlock(&xfs_Gqm_lock);
bdfb0430 204 return ENOMEM;
38e712ab 205 }
bdfb0430
CH
206 }
207
1da177e4
LT
208 /*
209 * We can keep a list of all filesystems with quotas mounted for
210 * debugging and statistical purposes, but ...
211 * Just take a reference and get out.
212 */
e2494582
CH
213 xfs_Gqm->qm_nrefs++;
214 mutex_unlock(&xfs_Gqm_lock);
1da177e4
LT
215
216 return 0;
217}
218
219
220/*
221 * Release the reference that a filesystem took at mount time,
222 * so that we know when we need to destroy the entire quota manager.
223 */
224/* ARGSUSED */
225STATIC void
226xfs_qm_rele_quotafs_ref(
227 struct xfs_mount *mp)
228{
1da177e4
LT
229 ASSERT(xfs_Gqm);
230 ASSERT(xfs_Gqm->qm_nrefs > 0);
231
1da177e4
LT
232 /*
233 * Destroy the entire XQM. If somebody mounts with quotaon, this'll
234 * be restarted.
235 */
e2494582
CH
236 mutex_lock(&xfs_Gqm_lock);
237 if (--xfs_Gqm->qm_nrefs == 0) {
1da177e4
LT
238 xfs_qm_destroy(xfs_Gqm);
239 xfs_Gqm = NULL;
240 }
e2494582 241 mutex_unlock(&xfs_Gqm_lock);
1da177e4
LT
242}
243
1da177e4
LT
244/*
245 * Just destroy the quotainfo structure.
246 */
247void
7d095257
CH
248xfs_qm_unmount(
249 struct xfs_mount *mp)
1da177e4 250{
7d095257 251 if (mp->m_quotainfo) {
8112e9dc 252 xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
1da177e4 253 xfs_qm_destroy_quotainfo(mp);
7d095257 254 }
1da177e4
LT
255}
256
257
258/*
259 * This is called from xfs_mountfs to start quotas and initialize all
260 * necessary data structures like quotainfo. This is also responsible for
261 * running a quotacheck as necessary. We are guaranteed that the superblock
262 * is consistently read in at this point.
53aa7915
DC
263 *
264 * If we fail here, the mount will continue with quota turned off. We don't
265 * need to inidicate success or failure at all.
1da177e4 266 */
53aa7915 267void
1da177e4 268xfs_qm_mount_quotas(
4249023a 269 xfs_mount_t *mp)
1da177e4 270{
1da177e4
LT
271 int error = 0;
272 uint sbf;
273
1da177e4
LT
274 /*
275 * If quotas on realtime volumes is not supported, we disable
276 * quotas immediately.
277 */
278 if (mp->m_sb.sb_rextents) {
0b932ccc 279 xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
1da177e4
LT
280 mp->m_qflags = 0;
281 goto write_changes;
282 }
283
1da177e4 284 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
155ffd07 285
1da177e4
LT
286 /*
287 * Allocate the quotainfo structure inside the mount struct, and
288 * create quotainode(s), and change/rev superblock if necessary.
289 */
53aa7915
DC
290 error = xfs_qm_init_quotainfo(mp);
291 if (error) {
1da177e4
LT
292 /*
293 * We must turn off quotas.
294 */
295 ASSERT(mp->m_quotainfo == NULL);
296 mp->m_qflags = 0;
297 goto write_changes;
298 }
299 /*
300 * If any of the quotas are not consistent, do a quotacheck.
301 */
4249023a 302 if (XFS_QM_NEED_QUOTACHECK(mp)) {
53aa7915
DC
303 error = xfs_qm_quotacheck(mp);
304 if (error) {
305 /* Quotacheck failed and disabled quotas. */
306 return;
1da177e4 307 }
1da177e4 308 }
646d5bda
DD
309 /*
310 * If one type of quotas is off, then it will lose its
311 * quotachecked status, since we won't be doing accounting for
312 * that type anymore.
313 */
53aa7915 314 if (!XFS_IS_UQUOTA_ON(mp))
646d5bda 315 mp->m_qflags &= ~XFS_UQUOTA_CHKD;
53aa7915 316 if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp)))
646d5bda 317 mp->m_qflags &= ~XFS_OQUOTA_CHKD;
155ffd07 318
1da177e4
LT
319 write_changes:
320 /*
3685c2a1 321 * We actually don't have to acquire the m_sb_lock at all.
1da177e4
LT
322 * This can only be called from mount, and that's single threaded. XXX
323 */
3685c2a1 324 spin_lock(&mp->m_sb_lock);
1da177e4
LT
325 sbf = mp->m_sb.sb_qflags;
326 mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
3685c2a1 327 spin_unlock(&mp->m_sb_lock);
1da177e4
LT
328
329 if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
330 if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
331 /*
332 * We could only have been turning quotas off.
333 * We aren't in very good shape actually because
334 * the incore structures are convinced that quotas are
335 * off, but the on disk superblock doesn't know that !
336 */
337 ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
53487786
DC
338 xfs_alert(mp, "%s: Superblock update failed!",
339 __func__);
1da177e4
LT
340 }
341 }
342
343 if (error) {
53487786 344 xfs_warn(mp, "Failed to initialize disk quotas.");
7d095257 345 return;
1da177e4 346 }
1da177e4
LT
347}
348
349/*
350 * Called from the vfsops layer.
351 */
e57481dc 352void
1da177e4
LT
353xfs_qm_unmount_quotas(
354 xfs_mount_t *mp)
355{
1da177e4
LT
356 /*
357 * Release the dquots that root inode, et al might be holding,
358 * before we flush quotas and blow away the quotainfo structure.
359 */
360 ASSERT(mp->m_rootip);
361 xfs_qm_dqdetach(mp->m_rootip);
362 if (mp->m_rbmip)
363 xfs_qm_dqdetach(mp->m_rbmip);
364 if (mp->m_rsumip)
365 xfs_qm_dqdetach(mp->m_rsumip);
366
367 /*
e57481dc 368 * Release the quota inodes.
1da177e4 369 */
1da177e4 370 if (mp->m_quotainfo) {
e57481dc
CH
371 if (mp->m_quotainfo->qi_uquotaip) {
372 IRELE(mp->m_quotainfo->qi_uquotaip);
373 mp->m_quotainfo->qi_uquotaip = NULL;
1da177e4 374 }
e57481dc
CH
375 if (mp->m_quotainfo->qi_gquotaip) {
376 IRELE(mp->m_quotainfo->qi_gquotaip);
377 mp->m_quotainfo->qi_gquotaip = NULL;
1da177e4
LT
378 }
379 }
1da177e4
LT
380}
381
382/*
383 * Flush all dquots of the given file system to disk. The dquots are
384 * _not_ purged from memory here, just their data written to disk.
385 */
ba0f32d4 386STATIC int
1da177e4 387xfs_qm_dqflush_all(
a7ef9bd7 388 struct xfs_mount *mp)
1da177e4 389{
8a7b8a89
CH
390 struct xfs_quotainfo *q = mp->m_quotainfo;
391 int recl;
392 struct xfs_dquot *dqp;
8a7b8a89 393 int error;
1da177e4 394
8a7b8a89 395 if (!q)
014c2544 396 return 0;
1da177e4 397again:
8a7b8a89
CH
398 mutex_lock(&q->qi_dqlist_lock);
399 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
1da177e4
LT
400 xfs_dqlock(dqp);
401 if (! XFS_DQ_IS_DIRTY(dqp)) {
402 xfs_dqunlock(dqp);
403 continue;
404 }
0b1b213f 405
1da177e4 406 /* XXX a sentinel would be better */
8a7b8a89 407 recl = q->qi_dqreclaims;
e1f49cf2 408 if (!xfs_dqflock_nowait(dqp)) {
1da177e4
LT
409 /*
410 * If we can't grab the flush lock then check
411 * to see if the dquot has been flushed delayed
412 * write. If so, grab its buffer and send it
413 * out immediately. We'll be able to acquire
414 * the flush lock when the I/O completes.
415 */
800b484e 416 xfs_dqflock_pushbuf_wait(dqp);
1da177e4
LT
417 }
418 /*
419 * Let go of the mplist lock. We don't want to hold it
420 * across a disk write.
421 */
8a7b8a89 422 mutex_unlock(&q->qi_dqlist_lock);
a7ef9bd7 423 error = xfs_qm_dqflush(dqp, 0);
1da177e4
LT
424 xfs_dqunlock(dqp);
425 if (error)
014c2544 426 return error;
1da177e4 427
8a7b8a89
CH
428 mutex_lock(&q->qi_dqlist_lock);
429 if (recl != q->qi_dqreclaims) {
430 mutex_unlock(&q->qi_dqlist_lock);
1da177e4
LT
431 /* XXX restart limit */
432 goto again;
433 }
434 }
435
8a7b8a89 436 mutex_unlock(&q->qi_dqlist_lock);
1da177e4 437 /* return ! busy */
014c2544 438 return 0;
1da177e4
LT
439}
440/*
441 * Release the group dquot pointers the user dquots may be
442 * carrying around as a hint. mplist is locked on entry and exit.
443 */
444STATIC void
445xfs_qm_detach_gdquots(
8a7b8a89 446 struct xfs_mount *mp)
1da177e4 447{
8a7b8a89
CH
448 struct xfs_quotainfo *q = mp->m_quotainfo;
449 struct xfs_dquot *dqp, *gdqp;
450 int nrecl;
1da177e4
LT
451
452 again:
8a7b8a89
CH
453 ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
454 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
1da177e4
LT
455 xfs_dqlock(dqp);
456 if ((gdqp = dqp->q_gdquot)) {
457 xfs_dqlock(gdqp);
458 dqp->q_gdquot = NULL;
459 }
460 xfs_dqunlock(dqp);
461
462 if (gdqp) {
463 /*
464 * Can't hold the mplist lock across a dqput.
465 * XXXmust convert to marker based iterations here.
466 */
8a7b8a89
CH
467 nrecl = q->qi_dqreclaims;
468 mutex_unlock(&q->qi_dqlist_lock);
1da177e4
LT
469 xfs_qm_dqput(gdqp);
470
8a7b8a89
CH
471 mutex_lock(&q->qi_dqlist_lock);
472 if (nrecl != q->qi_dqreclaims)
1da177e4
LT
473 goto again;
474 }
1da177e4
LT
475 }
476}
477
478/*
479 * Go through all the incore dquots of this file system and take them
480 * off the mplist and hashlist, if the dquot type matches the dqtype
481 * parameter. This is used when turning off quota accounting for
482 * users and/or groups, as well as when the filesystem is unmounting.
483 */
484STATIC int
485xfs_qm_dqpurge_int(
8a7b8a89
CH
486 struct xfs_mount *mp,
487 uint flags)
1da177e4 488{
8a7b8a89
CH
489 struct xfs_quotainfo *q = mp->m_quotainfo;
490 struct xfs_dquot *dqp, *n;
491 uint dqtype;
492 int nrecl;
493 int nmisses;
1da177e4 494
8a7b8a89 495 if (!q)
014c2544 496 return 0;
1da177e4
LT
497
498 dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
c8ad20ff 499 dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
1da177e4
LT
500 dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
501
8a7b8a89 502 mutex_lock(&q->qi_dqlist_lock);
1da177e4
LT
503
504 /*
505 * In the first pass through all incore dquots of this filesystem,
506 * we release the group dquot pointers the user dquots may be
507 * carrying around as a hint. We need to do this irrespective of
508 * what's being turned off.
509 */
510 xfs_qm_detach_gdquots(mp);
511
512 again:
513 nmisses = 0;
8a7b8a89 514 ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
1da177e4
LT
515 /*
516 * Try to get rid of all of the unwanted dquots. The idea is to
517 * get them off mplist and hashlist, but leave them on freelist.
518 */
8a7b8a89 519 list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
be7ffc38
CH
520 xfs_dqlock(dqp);
521 if ((dqp->dq_flags & dqtype) == 0) {
522 xfs_dqunlock(dqp);
1da177e4 523 continue;
be7ffc38
CH
524 }
525 xfs_dqunlock(dqp);
1da177e4 526
c9a192dc 527 if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
8a7b8a89
CH
528 nrecl = q->qi_dqreclaims;
529 mutex_unlock(&q->qi_dqlist_lock);
c9a192dc 530 mutex_lock(&dqp->q_hash->qh_lock);
8a7b8a89 531 mutex_lock(&q->qi_dqlist_lock);
1da177e4
LT
532
533 /*
534 * XXXTheoretically, we can get into a very long
535 * ping pong game here.
536 * No one can be adding dquots to the mplist at
537 * this point, but somebody might be taking things off.
538 */
8a7b8a89 539 if (nrecl != q->qi_dqreclaims) {
c9a192dc 540 mutex_unlock(&dqp->q_hash->qh_lock);
1da177e4
LT
541 goto again;
542 }
543 }
544
545 /*
546 * Take the dquot off the mplist and hashlist. It may remain on
547 * freelist in INACTIVE state.
548 */
4f0e8a98 549 nmisses += xfs_qm_dqpurge(dqp);
1da177e4 550 }
8a7b8a89 551 mutex_unlock(&q->qi_dqlist_lock);
1da177e4
LT
552 return nmisses;
553}
554
555int
556xfs_qm_dqpurge_all(
557 xfs_mount_t *mp,
558 uint flags)
559{
560 int ndquots;
561
562 /*
563 * Purge the dquot cache.
564 * None of the dquots should really be busy at this point.
565 */
566 if (mp->m_quotainfo) {
567 while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
568 delay(ndquots * 10);
569 }
570 }
571 return 0;
572}
573
574STATIC int
575xfs_qm_dqattach_one(
576 xfs_inode_t *ip,
577 xfs_dqid_t id,
578 uint type,
579 uint doalloc,
1da177e4
LT
580 xfs_dquot_t *udqhint, /* hint */
581 xfs_dquot_t **IO_idqpp)
582{
583 xfs_dquot_t *dqp;
584 int error;
585
579aa9ca 586 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1da177e4 587 error = 0;
8e9b6e7f 588
1da177e4
LT
589 /*
590 * See if we already have it in the inode itself. IO_idqpp is
591 * &i_udquot or &i_gdquot. This made the code look weird, but
592 * made the logic a lot simpler.
593 */
8e9b6e7f
CH
594 dqp = *IO_idqpp;
595 if (dqp) {
0b1b213f 596 trace_xfs_dqattach_found(dqp);
8e9b6e7f 597 return 0;
1da177e4
LT
598 }
599
600 /*
601 * udqhint is the i_udquot field in inode, and is non-NULL only
c8ad20ff 602 * when the type arg is group/project. Its purpose is to save a
1da177e4
LT
603 * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
604 * the user dquot.
605 */
8e9b6e7f
CH
606 if (udqhint) {
607 ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
1da177e4
LT
608 xfs_dqlock(udqhint);
609
8e9b6e7f
CH
610 /*
611 * No need to take dqlock to look at the id.
612 *
613 * The ID can't change until it gets reclaimed, and it won't
614 * be reclaimed as long as we have a ref from inode and we
615 * hold the ilock.
616 */
617 dqp = udqhint->q_gdquot;
618 if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
619 xfs_dqlock(dqp);
620 XFS_DQHOLD(dqp);
621 ASSERT(*IO_idqpp == NULL);
622 *IO_idqpp = dqp;
623
1da177e4
LT
624 xfs_dqunlock(dqp);
625 xfs_dqunlock(udqhint);
8e9b6e7f 626 return 0;
1da177e4 627 }
8e9b6e7f
CH
628
629 /*
630 * We can't hold a dquot lock when we call the dqget code.
631 * We'll deadlock in no time, because of (not conforming to)
632 * lock ordering - the inodelock comes before any dquot lock,
633 * and we may drop and reacquire the ilock in xfs_qm_dqget().
634 */
1da177e4 635 xfs_dqunlock(udqhint);
8e9b6e7f
CH
636 }
637
1da177e4
LT
638 /*
639 * Find the dquot from somewhere. This bumps the
640 * reference count of dquot and returns it locked.
641 * This can return ENOENT if dquot didn't exist on
642 * disk and we didn't ask it to allocate;
643 * ESRCH if quotas got turned off suddenly.
644 */
db3e74b5
MH
645 error = xfs_qm_dqget(ip->i_mount, ip, id, type,
646 doalloc | XFS_QMOPT_DOWARN, &dqp);
8e9b6e7f
CH
647 if (error)
648 return error;
1da177e4 649
0b1b213f 650 trace_xfs_dqattach_get(dqp);
8e9b6e7f 651
1da177e4
LT
652 /*
653 * dqget may have dropped and re-acquired the ilock, but it guarantees
654 * that the dquot returned is the one that should go in the inode.
655 */
656 *IO_idqpp = dqp;
8e9b6e7f
CH
657 xfs_dqunlock(dqp);
658 return 0;
1da177e4
LT
659}
660
661
662/*
663 * Given a udquot and gdquot, attach a ptr to the group dquot in the
664 * udquot as a hint for future lookups. The idea sounds simple, but the
665 * execution isn't, because the udquot might have a group dquot attached
c41564b5 666 * already and getting rid of that gets us into lock ordering constraints.
1da177e4
LT
667 * The process is complicated more by the fact that the dquots may or may not
668 * be locked on entry.
669 */
670STATIC void
671xfs_qm_dqattach_grouphint(
672 xfs_dquot_t *udq,
8e9b6e7f 673 xfs_dquot_t *gdq)
1da177e4
LT
674{
675 xfs_dquot_t *tmp;
676
8e9b6e7f 677 xfs_dqlock(udq);
1da177e4
LT
678
679 if ((tmp = udq->q_gdquot)) {
680 if (tmp == gdq) {
8e9b6e7f 681 xfs_dqunlock(udq);
1da177e4
LT
682 return;
683 }
684
685 udq->q_gdquot = NULL;
686 /*
687 * We can't keep any dqlocks when calling dqrele,
688 * because the freelist lock comes before dqlocks.
689 */
690 xfs_dqunlock(udq);
1da177e4
LT
691 /*
692 * we took a hard reference once upon a time in dqget,
693 * so give it back when the udquot no longer points at it
694 * dqput() does the unlocking of the dquot.
695 */
696 xfs_qm_dqrele(tmp);
697
698 xfs_dqlock(udq);
699 xfs_dqlock(gdq);
700
701 } else {
702 ASSERT(XFS_DQ_IS_LOCKED(udq));
8e9b6e7f 703 xfs_dqlock(gdq);
1da177e4
LT
704 }
705
706 ASSERT(XFS_DQ_IS_LOCKED(udq));
707 ASSERT(XFS_DQ_IS_LOCKED(gdq));
708 /*
709 * Somebody could have attached a gdquot here,
710 * when we dropped the uqlock. If so, just do nothing.
711 */
712 if (udq->q_gdquot == NULL) {
713 XFS_DQHOLD(gdq);
714 udq->q_gdquot = gdq;
715 }
8e9b6e7f
CH
716
717 xfs_dqunlock(gdq);
718 xfs_dqunlock(udq);
1da177e4
LT
719}
720
721
722/*
c8ad20ff
NS
723 * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
724 * into account.
1da177e4 725 * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
1da177e4
LT
726 * Inode may get unlocked and relocked in here, and the caller must deal with
727 * the consequences.
728 */
729int
7d095257 730xfs_qm_dqattach_locked(
1da177e4
LT
731 xfs_inode_t *ip,
732 uint flags)
733{
734 xfs_mount_t *mp = ip->i_mount;
735 uint nquotas = 0;
736 int error = 0;
737
7d095257
CH
738 if (!XFS_IS_QUOTA_RUNNING(mp) ||
739 !XFS_IS_QUOTA_ON(mp) ||
740 !XFS_NOT_DQATTACHED(mp, ip) ||
741 ip->i_ino == mp->m_sb.sb_uquotino ||
742 ip->i_ino == mp->m_sb.sb_gquotino)
014c2544 743 return 0;
1da177e4 744
7d095257 745 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1da177e4
LT
746
747 if (XFS_IS_UQUOTA_ON(mp)) {
748 error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
749 flags & XFS_QMOPT_DQALLOC,
1da177e4
LT
750 NULL, &ip->i_udquot);
751 if (error)
752 goto done;
753 nquotas++;
754 }
579aa9ca
CH
755
756 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
c8ad20ff
NS
757 if (XFS_IS_OQUOTA_ON(mp)) {
758 error = XFS_IS_GQUOTA_ON(mp) ?
759 xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
760 flags & XFS_QMOPT_DQALLOC,
c8ad20ff 761 ip->i_udquot, &ip->i_gdquot) :
6743099c 762 xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ,
1da177e4 763 flags & XFS_QMOPT_DQALLOC,
1da177e4
LT
764 ip->i_udquot, &ip->i_gdquot);
765 /*
766 * Don't worry about the udquot that we may have
767 * attached above. It'll get detached, if not already.
768 */
769 if (error)
770 goto done;
771 nquotas++;
772 }
773
774 /*
775 * Attach this group quota to the user quota as a hint.
776 * This WON'T, in general, result in a thrash.
777 */
778 if (nquotas == 2) {
579aa9ca 779 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1da177e4
LT
780 ASSERT(ip->i_udquot);
781 ASSERT(ip->i_gdquot);
782
783 /*
784 * We may or may not have the i_udquot locked at this point,
785 * but this check is OK since we don't depend on the i_gdquot to
786 * be accurate 100% all the time. It is just a hint, and this
787 * will succeed in general.
788 */
789 if (ip->i_udquot->q_gdquot == ip->i_gdquot)
790 goto done;
791 /*
792 * Attach i_gdquot to the gdquot hint inside the i_udquot.
793 */
8e9b6e7f 794 xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
1da177e4
LT
795 }
796
7d095257 797 done:
ea15ab3c
CH
798#ifdef DEBUG
799 if (!error) {
1da177e4
LT
800 if (XFS_IS_UQUOTA_ON(mp))
801 ASSERT(ip->i_udquot);
c8ad20ff 802 if (XFS_IS_OQUOTA_ON(mp))
1da177e4
LT
803 ASSERT(ip->i_gdquot);
804 }
7d095257 805 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1da177e4 806#endif
7d095257
CH
807 return error;
808}
1da177e4 809
7d095257
CH
810int
811xfs_qm_dqattach(
812 struct xfs_inode *ip,
813 uint flags)
814{
815 int error;
816
817 xfs_ilock(ip, XFS_ILOCK_EXCL);
818 error = xfs_qm_dqattach_locked(ip, flags);
819 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1da177e4 820
014c2544 821 return error;
1da177e4
LT
822}
823
824/*
825 * Release dquots (and their references) if any.
826 * The inode should be locked EXCL except when this's called by
827 * xfs_ireclaim.
828 */
829void
830xfs_qm_dqdetach(
831 xfs_inode_t *ip)
832{
833 if (!(ip->i_udquot || ip->i_gdquot))
834 return;
835
0b1b213f
CH
836 trace_xfs_dquot_dqdetach(ip);
837
1da177e4
LT
838 ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
839 ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
1da177e4
LT
840 if (ip->i_udquot) {
841 xfs_qm_dqrele(ip->i_udquot);
842 ip->i_udquot = NULL;
843 }
844 if (ip->i_gdquot) {
845 xfs_qm_dqrele(ip->i_gdquot);
846 ip->i_gdquot = NULL;
847 }
848}
849
a4edd1da
CH
850/*
851 * The hash chains and the mplist use the same xfs_dqhash structure as
852 * their list head, but we can take the mplist qh_lock and one of the
853 * hash qh_locks at the same time without any problem as they aren't
854 * related.
855 */
856static struct lock_class_key xfs_quota_mplist_class;
1da177e4
LT
857
858/*
859 * This initializes all the quota information that's kept in the
860 * mount structure
861 */
ba0f32d4 862STATIC int
1da177e4
LT
863xfs_qm_init_quotainfo(
864 xfs_mount_t *mp)
865{
866 xfs_quotainfo_t *qinf;
867 int error;
868 xfs_dquot_t *dqp;
869
870 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
871
872 /*
873 * Tell XQM that we exist as soon as possible.
874 */
875 if ((error = xfs_qm_hold_quotafs_ref(mp))) {
014c2544 876 return error;
1da177e4
LT
877 }
878
879 qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
880
881 /*
882 * See if quotainodes are setup, and if not, allocate them,
883 * and change the superblock accordingly.
884 */
885 if ((error = xfs_qm_init_quotainos(mp))) {
f0e2d93c 886 kmem_free(qinf);
1da177e4 887 mp->m_quotainfo = NULL;
014c2544 888 return error;
1da177e4
LT
889 }
890
3a25404b
DC
891 INIT_LIST_HEAD(&qinf->qi_dqlist);
892 mutex_init(&qinf->qi_dqlist_lock);
893 lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class);
a4edd1da 894
1da177e4
LT
895 qinf->qi_dqreclaims = 0;
896
897 /* mutex used to serialize quotaoffs */
794ee1ba 898 mutex_init(&qinf->qi_quotaofflock);
1da177e4
LT
899
900 /* Precalc some constants */
901 qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
902 ASSERT(qinf->qi_dqchunklen);
903 qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen);
904 do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
905
906 mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
907
908 /*
909 * We try to get the limits from the superuser's limits fields.
910 * This is quite hacky, but it is standard quota practice.
911 * We look at the USR dquot with id == 0 first, but if user quotas
912 * are not enabled we goto the GRP dquot with id == 0.
913 * We don't really care to keep separate default limits for user
914 * and group quotas, at least not at this point.
915 */
916 error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
c8ad20ff
NS
917 XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER :
918 (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
919 XFS_DQ_PROJ),
1da177e4
LT
920 XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
921 &dqp);
922 if (! error) {
923 xfs_disk_dquot_t *ddqp = &dqp->q_core;
924
925 /*
926 * The warnings and timers set the grace period given to
927 * a user or group before he or she can not perform any
928 * more writing. If it is zero, a default is used.
929 */
1149d96a
CH
930 qinf->qi_btimelimit = ddqp->d_btimer ?
931 be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT;
932 qinf->qi_itimelimit = ddqp->d_itimer ?
933 be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT;
934 qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ?
935 be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT;
936 qinf->qi_bwarnlimit = ddqp->d_bwarns ?
937 be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT;
938 qinf->qi_iwarnlimit = ddqp->d_iwarns ?
939 be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
940 qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
941 be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
942 qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
943 qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
944 qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
945 qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
946 qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
947 qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
1da177e4
LT
948
949 /*
950 * We sent the XFS_QMOPT_DQSUSER flag to dqget because
951 * we don't want this dquot cached. We haven't done a
952 * quotacheck yet, and quotacheck doesn't like incore dquots.
953 */
954 xfs_qm_dqdestroy(dqp);
955 } else {
956 qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
957 qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
958 qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
959 qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
960 qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
06d10dd9 961 qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
1da177e4
LT
962 }
963
014c2544 964 return 0;
1da177e4
LT
965}
966
967
968/*
969 * Gets called when unmounting a filesystem or when all quotas get
970 * turned off.
971 * This purges the quota inodes, destroys locks and frees itself.
972 */
973void
974xfs_qm_destroy_quotainfo(
975 xfs_mount_t *mp)
976{
977 xfs_quotainfo_t *qi;
978
979 qi = mp->m_quotainfo;
980 ASSERT(qi != NULL);
981 ASSERT(xfs_Gqm != NULL);
982
983 /*
984 * Release the reference that XQM kept, so that we know
985 * when the XQM structure should be freed. We cannot assume
986 * that xfs_Gqm is non-null after this point.
987 */
988 xfs_qm_rele_quotafs_ref(mp);
989
3a25404b
DC
990 ASSERT(list_empty(&qi->qi_dqlist));
991 mutex_destroy(&qi->qi_dqlist_lock);
1da177e4
LT
992
993 if (qi->qi_uquotaip) {
26cc0021 994 IRELE(qi->qi_uquotaip);
1da177e4
LT
995 qi->qi_uquotaip = NULL; /* paranoia */
996 }
997 if (qi->qi_gquotaip) {
26cc0021 998 IRELE(qi->qi_gquotaip);
1da177e4
LT
999 qi->qi_gquotaip = NULL;
1000 }
1001 mutex_destroy(&qi->qi_quotaofflock);
f0e2d93c 1002 kmem_free(qi);
1da177e4
LT
1003 mp->m_quotainfo = NULL;
1004}
1005
1006
1007
1008/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
1009
1010/* ARGSUSED */
1011STATIC void
1012xfs_qm_list_init(
1013 xfs_dqlist_t *list,
1014 char *str,
1015 int n)
1016{
794ee1ba 1017 mutex_init(&list->qh_lock);
e6a81f13 1018 INIT_LIST_HEAD(&list->qh_list);
1da177e4
LT
1019 list->qh_version = 0;
1020 list->qh_nelems = 0;
1021}
1022
1023STATIC void
1024xfs_qm_list_destroy(
1025 xfs_dqlist_t *list)
1026{
1027 mutex_destroy(&(list->qh_lock));
1028}
1029
1da177e4
LT
1030/*
1031 * Create an inode and return with a reference already taken, but unlocked
1032 * This is how we create quota inodes
1033 */
1034STATIC int
1035xfs_qm_qino_alloc(
1036 xfs_mount_t *mp,
1037 xfs_inode_t **ip,
1038 __int64_t sbfields,
1039 uint flags)
1040{
1041 xfs_trans_t *tp;
1042 int error;
1da177e4
LT
1043 int committed;
1044
061f7209 1045 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
1da177e4
LT
1046 if ((error = xfs_trans_reserve(tp,
1047 XFS_QM_QINOCREATE_SPACE_RES(mp),
1048 XFS_CREATE_LOG_RES(mp), 0,
1049 XFS_TRANS_PERM_LOG_RES,
1050 XFS_CREATE_LOG_COUNT))) {
1051 xfs_trans_cancel(tp, 0);
014c2544 1052 return error;
1da177e4 1053 }
1da177e4 1054
6c77b0ea
CH
1055 error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed);
1056 if (error) {
1da177e4
LT
1057 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
1058 XFS_TRANS_ABORT);
014c2544 1059 return error;
1da177e4
LT
1060 }
1061
1da177e4
LT
1062 /*
1063 * Make the changes in the superblock, and log those too.
1064 * sbfields arg may contain fields other than *QUOTINO;
1065 * VERSIONNUM for example.
1066 */
3685c2a1 1067 spin_lock(&mp->m_sb_lock);
1da177e4 1068 if (flags & XFS_QMOPT_SBVERSION) {
62118709 1069 ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
1da177e4
LT
1070 ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1071 XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
1072 (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1073 XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
1074
62118709 1075 xfs_sb_version_addquota(&mp->m_sb);
1da177e4
LT
1076 mp->m_sb.sb_uquotino = NULLFSINO;
1077 mp->m_sb.sb_gquotino = NULLFSINO;
1078
1079 /* qflags will get updated _after_ quotacheck */
1080 mp->m_sb.sb_qflags = 0;
1da177e4
LT
1081 }
1082 if (flags & XFS_QMOPT_UQUOTA)
1083 mp->m_sb.sb_uquotino = (*ip)->i_ino;
1084 else
1085 mp->m_sb.sb_gquotino = (*ip)->i_ino;
3685c2a1 1086 spin_unlock(&mp->m_sb_lock);
1da177e4
LT
1087 xfs_mod_sb(tp, sbfields);
1088
1c72bf90 1089 if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
53487786 1090 xfs_alert(mp, "%s failed (error %d)!", __func__, error);
014c2544 1091 return error;
1da177e4 1092 }
014c2544 1093 return 0;
1da177e4
LT
1094}
1095
1096
5b139738 1097STATIC void
1da177e4
LT
1098xfs_qm_reset_dqcounts(
1099 xfs_mount_t *mp,
1100 xfs_buf_t *bp,
1101 xfs_dqid_t id,
1102 uint type)
1103{
1104 xfs_disk_dquot_t *ddq;
1105 int j;
1106
0b1b213f
CH
1107 trace_xfs_reset_dqcounts(bp, _RET_IP_);
1108
1da177e4
LT
1109 /*
1110 * Reset all counters and timers. They'll be
1111 * started afresh by xfs_qm_quotacheck.
1112 */
1113#ifdef DEBUG
1114 j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
1115 do_div(j, sizeof(xfs_dqblk_t));
8a7b8a89 1116 ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
1da177e4 1117#endif
62926044 1118 ddq = bp->b_addr;
8a7b8a89 1119 for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) {
1da177e4
LT
1120 /*
1121 * Do a sanity check, and if needed, repair the dqblk. Don't
1122 * output any warnings because it's perfectly possible to
c41564b5 1123 * find uninitialised dquot blks. See comment in xfs_qm_dqcheck.
1da177e4 1124 */
a0fa2b67 1125 (void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
1da177e4 1126 "xfs_quotacheck");
1149d96a
CH
1127 ddq->d_bcount = 0;
1128 ddq->d_icount = 0;
1129 ddq->d_rtbcount = 0;
1130 ddq->d_btimer = 0;
1131 ddq->d_itimer = 0;
1132 ddq->d_rtbtimer = 0;
1133 ddq->d_bwarns = 0;
1134 ddq->d_iwarns = 0;
1135 ddq->d_rtbwarns = 0;
1da177e4
LT
1136 ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
1137 }
1da177e4
LT
1138}
1139
1140STATIC int
1141xfs_qm_dqiter_bufs(
1142 xfs_mount_t *mp,
1143 xfs_dqid_t firstid,
1144 xfs_fsblock_t bno,
1145 xfs_filblks_t blkcnt,
1146 uint flags)
1147{
1148 xfs_buf_t *bp;
1149 int error;
c8ad20ff 1150 int type;
1da177e4
LT
1151
1152 ASSERT(blkcnt > 0);
c8ad20ff
NS
1153 type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
1154 (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
1da177e4
LT
1155 error = 0;
1156
1157 /*
1158 * Blkcnt arg can be a very big number, and might even be
1159 * larger than the log itself. So, we have to break it up into
1160 * manageable-sized transactions.
1161 * Note that we don't start a permanent transaction here; we might
1162 * not be able to get a log reservation for the whole thing up front,
1163 * and we don't really care to either, because we just discard
1164 * everything if we were to crash in the middle of this loop.
1165 */
1166 while (blkcnt--) {
1167 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
1168 XFS_FSB_TO_DADDR(mp, bno),
8a7b8a89 1169 mp->m_quotainfo->qi_dqchunklen, 0, &bp);
1da177e4
LT
1170 if (error)
1171 break;
1172
5b139738 1173 xfs_qm_reset_dqcounts(mp, bp, firstid, type);
61551f1e
CH
1174 xfs_buf_delwri_queue(bp);
1175 xfs_buf_relse(bp);
1da177e4
LT
1176 /*
1177 * goto the next block.
1178 */
1179 bno++;
8a7b8a89 1180 firstid += mp->m_quotainfo->qi_dqperchunk;
1da177e4 1181 }
014c2544 1182 return error;
1da177e4
LT
1183}
1184
1185/*
c8ad20ff 1186 * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a
1da177e4
LT
1187 * caller supplied function for every chunk of dquots that we find.
1188 */
1189STATIC int
1190xfs_qm_dqiterate(
1191 xfs_mount_t *mp,
1192 xfs_inode_t *qip,
1193 uint flags)
1194{
1195 xfs_bmbt_irec_t *map;
1196 int i, nmaps; /* number of map entries */
1197 int error; /* return value */
1198 xfs_fileoff_t lblkno;
1199 xfs_filblks_t maxlblkcnt;
1200 xfs_dqid_t firstid;
1201 xfs_fsblock_t rablkno;
1202 xfs_filblks_t rablkcnt;
1203
1204 error = 0;
1205 /*
c41564b5 1206 * This looks racy, but we can't keep an inode lock across a
1da177e4
LT
1207 * trans_reserve. But, this gets called during quotacheck, and that
1208 * happens only at mount time which is single threaded.
1209 */
1210 if (qip->i_d.di_nblocks == 0)
014c2544 1211 return 0;
1da177e4
LT
1212
1213 map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
1214
1215 lblkno = 0;
1216 maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
1217 do {
1218 nmaps = XFS_DQITER_MAP_SIZE;
1219 /*
1220 * We aren't changing the inode itself. Just changing
1221 * some of its data. No new blocks are added here, and
1222 * the inode is never added to the transaction.
1223 */
1224 xfs_ilock(qip, XFS_ILOCK_SHARED);
5c8ed202
DC
1225 error = xfs_bmapi_read(qip, lblkno, maxlblkcnt - lblkno,
1226 map, &nmaps, 0);
1da177e4
LT
1227 xfs_iunlock(qip, XFS_ILOCK_SHARED);
1228 if (error)
1229 break;
1230
1231 ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
1232 for (i = 0; i < nmaps; i++) {
1233 ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
1234 ASSERT(map[i].br_blockcount);
1235
1236
1237 lblkno += map[i].br_blockcount;
1238
1239 if (map[i].br_startblock == HOLESTARTBLOCK)
1240 continue;
1241
1242 firstid = (xfs_dqid_t) map[i].br_startoff *
8a7b8a89 1243 mp->m_quotainfo->qi_dqperchunk;
1da177e4
LT
1244 /*
1245 * Do a read-ahead on the next extent.
1246 */
1247 if ((i+1 < nmaps) &&
1248 (map[i+1].br_startblock != HOLESTARTBLOCK)) {
1249 rablkcnt = map[i+1].br_blockcount;
1250 rablkno = map[i+1].br_startblock;
1251 while (rablkcnt--) {
1a1a3e97 1252 xfs_buf_readahead(mp->m_ddev_targp,
1da177e4 1253 XFS_FSB_TO_DADDR(mp, rablkno),
8a7b8a89 1254 mp->m_quotainfo->qi_dqchunklen);
1da177e4
LT
1255 rablkno++;
1256 }
1257 }
1258 /*
1259 * Iterate thru all the blks in the extent and
1260 * reset the counters of all the dquots inside them.
1261 */
1262 if ((error = xfs_qm_dqiter_bufs(mp,
1263 firstid,
1264 map[i].br_startblock,
1265 map[i].br_blockcount,
1266 flags))) {
1267 break;
1268 }
1269 }
1270
1271 if (error)
1272 break;
1273 } while (nmaps > 0);
1274
f0e2d93c 1275 kmem_free(map);
1da177e4 1276
014c2544 1277 return error;
1da177e4
LT
1278}
1279
1280/*
1281 * Called by dqusage_adjust in doing a quotacheck.
52fda114
CH
1282 *
1283 * Given the inode, and a dquot id this updates both the incore dqout as well
1284 * as the buffer copy. This is so that once the quotacheck is done, we can
1285 * just log all the buffers, as opposed to logging numerous updates to
1286 * individual dquots.
1da177e4 1287 */
52fda114 1288STATIC int
1da177e4 1289xfs_qm_quotacheck_dqadjust(
52fda114
CH
1290 struct xfs_inode *ip,
1291 xfs_dqid_t id,
1292 uint type,
1da177e4
LT
1293 xfs_qcnt_t nblks,
1294 xfs_qcnt_t rtblks)
1295{
52fda114
CH
1296 struct xfs_mount *mp = ip->i_mount;
1297 struct xfs_dquot *dqp;
1298 int error;
1299
1300 error = xfs_qm_dqget(mp, ip, id, type,
1301 XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp);
1302 if (error) {
1303 /*
1304 * Shouldn't be able to turn off quotas here.
1305 */
1306 ASSERT(error != ESRCH);
1307 ASSERT(error != ENOENT);
1308 return error;
1309 }
0b1b213f
CH
1310
1311 trace_xfs_dqadjust(dqp);
1312
1da177e4
LT
1313 /*
1314 * Adjust the inode count and the block count to reflect this inode's
1315 * resource usage.
1316 */
413d57c9 1317 be64_add_cpu(&dqp->q_core.d_icount, 1);
1da177e4
LT
1318 dqp->q_res_icount++;
1319 if (nblks) {
413d57c9 1320 be64_add_cpu(&dqp->q_core.d_bcount, nblks);
1da177e4
LT
1321 dqp->q_res_bcount += nblks;
1322 }
1323 if (rtblks) {
413d57c9 1324 be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks);
1da177e4
LT
1325 dqp->q_res_rtbcount += rtblks;
1326 }
1327
1328 /*
1329 * Set default limits, adjust timers (since we changed usages)
191f8488
CH
1330 *
1331 * There are no timers for the default values set in the root dquot.
1da177e4 1332 */
191f8488 1333 if (dqp->q_core.d_id) {
52fda114
CH
1334 xfs_qm_adjust_dqlimits(mp, &dqp->q_core);
1335 xfs_qm_adjust_dqtimers(mp, &dqp->q_core);
1da177e4
LT
1336 }
1337
1338 dqp->dq_flags |= XFS_DQ_DIRTY;
52fda114
CH
1339 xfs_qm_dqput(dqp);
1340 return 0;
1da177e4
LT
1341}
1342
1343STATIC int
1344xfs_qm_get_rtblks(
1345 xfs_inode_t *ip,
1346 xfs_qcnt_t *O_rtblks)
1347{
1348 xfs_filblks_t rtblks; /* total rt blks */
4eea22f0 1349 xfs_extnum_t idx; /* extent record index */
1da177e4
LT
1350 xfs_ifork_t *ifp; /* inode fork pointer */
1351 xfs_extnum_t nextents; /* number of extent entries */
1da177e4
LT
1352 int error;
1353
1354 ASSERT(XFS_IS_REALTIME_INODE(ip));
1355 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1356 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1357 if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
014c2544 1358 return error;
1da177e4
LT
1359 }
1360 rtblks = 0;
4eea22f0 1361 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
a6f64d4a
CH
1362 for (idx = 0; idx < nextents; idx++)
1363 rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx));
1da177e4 1364 *O_rtblks = (xfs_qcnt_t)rtblks;
014c2544 1365 return 0;
1da177e4
LT
1366}
1367
1368/*
1369 * callback routine supplied to bulkstat(). Given an inumber, find its
1370 * dquots and update them to account for resources taken by that inode.
1371 */
1372/* ARGSUSED */
1373STATIC int
1374xfs_qm_dqusage_adjust(
1375 xfs_mount_t *mp, /* mount point for filesystem */
1376 xfs_ino_t ino, /* inode number to get data for */
1377 void __user *buffer, /* not used */
1378 int ubsize, /* not used */
1da177e4 1379 int *ubused, /* not used */
1da177e4
LT
1380 int *res) /* result code value */
1381{
1382 xfs_inode_t *ip;
52fda114 1383 xfs_qcnt_t nblks, rtblks = 0;
1da177e4
LT
1384 int error;
1385
1386 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1387
1388 /*
1389 * rootino must have its resources accounted for, not so with the quota
1390 * inodes.
1391 */
1392 if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
1393 *res = BULKSTAT_RV_NOTHING;
1394 return XFS_ERROR(EINVAL);
1395 }
1396
1397 /*
1398 * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget
1399 * interface expects the inode to be exclusively locked because that's
1400 * the case in all other instances. It's OK that we do this because
1401 * quotacheck is done only at mount time.
1402 */
52fda114
CH
1403 error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip);
1404 if (error) {
1da177e4 1405 *res = BULKSTAT_RV_NOTHING;
014c2544 1406 return error;
1da177e4
LT
1407 }
1408
52fda114 1409 ASSERT(ip->i_delayed_blks == 0);
1da177e4 1410
52fda114 1411 if (XFS_IS_REALTIME_INODE(ip)) {
1da177e4
LT
1412 /*
1413 * Walk thru the extent list and count the realtime blocks.
1414 */
52fda114
CH
1415 error = xfs_qm_get_rtblks(ip, &rtblks);
1416 if (error)
1417 goto error0;
1da177e4 1418 }
1da177e4 1419
52fda114 1420 nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
1da177e4
LT
1421
1422 /*
1423 * Add the (disk blocks and inode) resources occupied by this
1424 * inode to its dquots. We do this adjustment in the incore dquot,
1425 * and also copy the changes to its buffer.
1426 * We don't care about putting these changes in a transaction
1427 * envelope because if we crash in the middle of a 'quotacheck'
1428 * we have to start from the beginning anyway.
1429 * Once we're done, we'll log all the dquot bufs.
1430 *
c41564b5 1431 * The *QUOTA_ON checks below may look pretty racy, but quotachecks
1da177e4
LT
1432 * and quotaoffs don't race. (Quotachecks happen at mount time only).
1433 */
1434 if (XFS_IS_UQUOTA_ON(mp)) {
52fda114
CH
1435 error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid,
1436 XFS_DQ_USER, nblks, rtblks);
1437 if (error)
1438 goto error0;
1da177e4 1439 }
52fda114
CH
1440
1441 if (XFS_IS_GQUOTA_ON(mp)) {
1442 error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid,
1443 XFS_DQ_GROUP, nblks, rtblks);
1444 if (error)
1445 goto error0;
1da177e4 1446 }
1da177e4 1447
52fda114 1448 if (XFS_IS_PQUOTA_ON(mp)) {
6743099c 1449 error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip),
52fda114
CH
1450 XFS_DQ_PROJ, nblks, rtblks);
1451 if (error)
1452 goto error0;
1453 }
1454
1455 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1456 IRELE(ip);
1da177e4 1457 *res = BULKSTAT_RV_DIDONE;
014c2544 1458 return 0;
52fda114
CH
1459
1460error0:
1461 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1462 IRELE(ip);
1463 *res = BULKSTAT_RV_GIVEUP;
1464 return error;
1da177e4
LT
1465}
1466
1467/*
1468 * Walk thru all the filesystem inodes and construct a consistent view
1469 * of the disk quota world. If the quotacheck fails, disable quotas.
1470 */
1471int
1472xfs_qm_quotacheck(
1473 xfs_mount_t *mp)
1474{
1475 int done, count, error;
1476 xfs_ino_t lastino;
1477 size_t structsz;
1478 xfs_inode_t *uip, *gip;
1479 uint flags;
1480
1481 count = INT_MAX;
1482 structsz = 1;
1483 lastino = 0;
1484 flags = 0;
1485
8a7b8a89 1486 ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip);
1da177e4
LT
1487 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1488
1489 /*
1490 * There should be no cached dquots. The (simplistic) quotacheck
1491 * algorithm doesn't like that.
1492 */
3a25404b 1493 ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist));
1da177e4 1494
0b932ccc 1495 xfs_notice(mp, "Quotacheck needed: Please wait.");
1da177e4
LT
1496
1497 /*
c8ad20ff 1498 * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
1da177e4
LT
1499 * their counters to zero. We need a clean slate.
1500 * We don't log our changes till later.
1501 */
8a7b8a89
CH
1502 uip = mp->m_quotainfo->qi_uquotaip;
1503 if (uip) {
1504 error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA);
1505 if (error)
1da177e4
LT
1506 goto error_return;
1507 flags |= XFS_UQUOTA_CHKD;
1508 }
1509
8a7b8a89
CH
1510 gip = mp->m_quotainfo->qi_gquotaip;
1511 if (gip) {
1512 error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
1513 XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
1514 if (error)
1da177e4 1515 goto error_return;
c8ad20ff 1516 flags |= XFS_OQUOTA_CHKD;
1da177e4
LT
1517 }
1518
1519 do {
1520 /*
1521 * Iterate thru all the inodes in the file system,
1522 * adjusting the corresponding dquot counters in core.
1523 */
7dce11db
CH
1524 error = xfs_bulkstat(mp, &lastino, &count,
1525 xfs_qm_dqusage_adjust,
1526 structsz, NULL, &done);
1527 if (error)
1da177e4
LT
1528 break;
1529
7dce11db 1530 } while (!done);
1da177e4 1531
4b8879df
DC
1532 /*
1533 * We've made all the changes that we need to make incore.
1534 * Flush them down to disk buffers if everything was updated
1535 * successfully.
1536 */
1537 if (!error)
a7ef9bd7 1538 error = xfs_qm_dqflush_all(mp);
4b8879df 1539
1da177e4
LT
1540 /*
1541 * We can get this error if we couldn't do a dquot allocation inside
1542 * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
1543 * dirty dquots that might be cached, we just want to get rid of them
1544 * and turn quotaoff. The dquots won't be attached to any of the inodes
1545 * at this point (because we intentionally didn't in dqget_noattach).
1546 */
1547 if (error) {
8112e9dc 1548 xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
1da177e4
LT
1549 goto error_return;
1550 }
1da177e4
LT
1551
1552 /*
1553 * We didn't log anything, because if we crashed, we'll have to
1554 * start the quotacheck from scratch anyway. However, we must make
1555 * sure that our dquot changes are secure before we put the
1556 * quotacheck'd stamp on the superblock. So, here we do a synchronous
1557 * flush.
1558 */
a9add83e 1559 xfs_flush_buftarg(mp->m_ddev_targp, 1);
1da177e4
LT
1560
1561 /*
1562 * If one type of quotas is off, then it will lose its
1563 * quotachecked status, since we won't be doing accounting for
1564 * that type anymore.
1565 */
c8ad20ff 1566 mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
1da177e4
LT
1567 mp->m_qflags |= flags;
1568
1da177e4
LT
1569 error_return:
1570 if (error) {
0b932ccc
DC
1571 xfs_warn(mp,
1572 "Quotacheck: Unsuccessful (Error %d): Disabling quotas.",
1573 error);
1da177e4
LT
1574 /*
1575 * We must turn off quotas.
1576 */
1577 ASSERT(mp->m_quotainfo != NULL);
1578 ASSERT(xfs_Gqm != NULL);
1579 xfs_qm_destroy_quotainfo(mp);
31d5577b 1580 if (xfs_mount_reset_sbqflags(mp)) {
0b932ccc
DC
1581 xfs_warn(mp,
1582 "Quotacheck: Failed to reset quota flags.");
31d5577b 1583 }
0b932ccc
DC
1584 } else
1585 xfs_notice(mp, "Quotacheck: Done.");
1da177e4
LT
1586 return (error);
1587}
1588
1589/*
1590 * This is called after the superblock has been read in and we're ready to
1591 * iget the quota inodes.
1592 */
1593STATIC int
1594xfs_qm_init_quotainos(
1595 xfs_mount_t *mp)
1596{
1597 xfs_inode_t *uip, *gip;
1598 int error;
1599 __int64_t sbflags;
1600 uint flags;
1601
1602 ASSERT(mp->m_quotainfo);
1603 uip = gip = NULL;
1604 sbflags = 0;
1605 flags = 0;
1606
1607 /*
1608 * Get the uquota and gquota inodes
1609 */
62118709 1610 if (xfs_sb_version_hasquota(&mp->m_sb)) {
1da177e4
LT
1611 if (XFS_IS_UQUOTA_ON(mp) &&
1612 mp->m_sb.sb_uquotino != NULLFSINO) {
1613 ASSERT(mp->m_sb.sb_uquotino > 0);
1614 if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
7b6259e7 1615 0, 0, &uip)))
1da177e4
LT
1616 return XFS_ERROR(error);
1617 }
c8ad20ff 1618 if (XFS_IS_OQUOTA_ON(mp) &&
1da177e4
LT
1619 mp->m_sb.sb_gquotino != NULLFSINO) {
1620 ASSERT(mp->m_sb.sb_gquotino > 0);
1621 if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
7b6259e7 1622 0, 0, &gip))) {
1da177e4 1623 if (uip)
43355099 1624 IRELE(uip);
1da177e4
LT
1625 return XFS_ERROR(error);
1626 }
1627 }
1628 } else {
1629 flags |= XFS_QMOPT_SBVERSION;
1630 sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1631 XFS_SB_GQUOTINO | XFS_SB_QFLAGS);
1632 }
1633
1634 /*
1635 * Create the two inodes, if they don't exist already. The changes
1636 * made above will get added to a transaction and logged in one of
1637 * the qino_alloc calls below. If the device is readonly,
1638 * temporarily switch to read-write to do this.
1639 */
1640 if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
1641 if ((error = xfs_qm_qino_alloc(mp, &uip,
1642 sbflags | XFS_SB_UQUOTINO,
1643 flags | XFS_QMOPT_UQUOTA)))
1644 return XFS_ERROR(error);
1645
1646 flags &= ~XFS_QMOPT_SBVERSION;
1647 }
c8ad20ff
NS
1648 if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) {
1649 flags |= (XFS_IS_GQUOTA_ON(mp) ?
1650 XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
1651 error = xfs_qm_qino_alloc(mp, &gip,
1652 sbflags | XFS_SB_GQUOTINO, flags);
1653 if (error) {
1da177e4 1654 if (uip)
43355099 1655 IRELE(uip);
1da177e4
LT
1656
1657 return XFS_ERROR(error);
1658 }
1659 }
1660
8a7b8a89
CH
1661 mp->m_quotainfo->qi_uquotaip = uip;
1662 mp->m_quotainfo->qi_gquotaip = gip;
1da177e4 1663
014c2544 1664 return 0;
1da177e4
LT
1665}
1666
1667
368e1361 1668
1da177e4 1669/*
368e1361
DC
1670 * Just pop the least recently used dquot off the freelist and
1671 * recycle it. The returned dquot is locked.
1da177e4 1672 */
368e1361
DC
1673STATIC xfs_dquot_t *
1674xfs_qm_dqreclaim_one(void)
1da177e4 1675{
368e1361
DC
1676 xfs_dquot_t *dqpout;
1677 xfs_dquot_t *dqp;
1da177e4 1678 int restarts;
0fbca4d1 1679 int startagain;
1da177e4 1680
1da177e4 1681 restarts = 0;
368e1361 1682 dqpout = NULL;
1da177e4 1683
368e1361 1684 /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
0fbca4d1
DC
1685again:
1686 startagain = 0;
3a8406f6 1687 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
1da177e4 1688
3a8406f6 1689 list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
3a25404b 1690 struct xfs_mount *mp = dqp->q_mount;
1da177e4
LT
1691 xfs_dqlock(dqp);
1692
1693 /*
be7ffc38
CH
1694 * This dquot has already been grabbed by dqlookup.
1695 * Remove it from the freelist and try again.
1da177e4 1696 */
be7ffc38 1697 if (dqp->q_nrefs) {
368e1361 1698 trace_xfs_dqreclaim_want(dqp);
1da177e4 1699 XQM_STATS_INC(xqmstats.xs_qm_dqwants);
be7ffc38
CH
1700
1701 list_del_init(&dqp->q_freelist);
1702 xfs_Gqm->qm_dqfrlist_cnt--;
0fbca4d1
DC
1703 restarts++;
1704 startagain = 1;
1705 goto dqunlock;
1da177e4
LT
1706 }
1707
368e1361 1708 ASSERT(dqp->q_hash);
3a25404b 1709 ASSERT(!list_empty(&dqp->q_mplist));
368e1361 1710
1da177e4 1711 /*
0fbca4d1
DC
1712 * Try to grab the flush lock. If this dquot is in the process
1713 * of getting flushed to disk, we don't want to reclaim it.
1da177e4 1714 */
0fbca4d1
DC
1715 if (!xfs_dqflock_nowait(dqp))
1716 goto dqunlock;
1da177e4
LT
1717
1718 /*
1719 * We have the flush lock so we know that this is not in the
1720 * process of being flushed. So, if this is dirty, flush it
1721 * DELWRI so that we don't get a freelist infested with
1722 * dirty dquots.
1723 */
1724 if (XFS_DQ_IS_DIRTY(dqp)) {
3c56836f 1725 int error;
0b1b213f 1726
368e1361 1727 trace_xfs_dqreclaim_dirty(dqp);
0b1b213f 1728
1da177e4
LT
1729 /*
1730 * We flush it delayed write, so don't bother
368e1361 1731 * releasing the freelist lock.
1da177e4 1732 */
fdedf28b 1733 error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK);
3c56836f 1734 if (error) {
53487786
DC
1735 xfs_warn(mp, "%s: dquot %p flush failed",
1736 __func__, dqp);
3c56836f 1737 }
0fbca4d1 1738 goto dqunlock;
1da177e4 1739 }
368e1361 1740
1da177e4
LT
1741 /*
1742 * We're trying to get the hashlock out of order. This races
1743 * with dqlookup; so, we giveup and goto the next dquot if
1744 * we couldn't get the hashlock. This way, we won't starve
1745 * a dqlookup process that holds the hashlock that is
1746 * waiting for the freelist lock.
1747 */
c9a192dc 1748 if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
368e1361
DC
1749 restarts++;
1750 goto dqfunlock;
1da177e4 1751 }
368e1361 1752
1da177e4
LT
1753 /*
1754 * This races with dquot allocation code as well as dqflush_all
1755 * and reclaim code. So, if we failed to grab the mplist lock,
1756 * giveup everything and start over.
1757 */
3a25404b 1758 if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
368e1361 1759 restarts++;
0fbca4d1
DC
1760 startagain = 1;
1761 goto qhunlock;
1da177e4 1762 }
0b1b213f 1763
1da177e4 1764 ASSERT(dqp->q_nrefs == 0);
3a25404b
DC
1765 list_del_init(&dqp->q_mplist);
1766 mp->m_quotainfo->qi_dquots--;
1767 mp->m_quotainfo->qi_dqreclaims++;
e6a81f13
DC
1768 list_del_init(&dqp->q_hashlist);
1769 dqp->q_hash->qh_version++;
3a8406f6
DC
1770 list_del_init(&dqp->q_freelist);
1771 xfs_Gqm->qm_dqfrlist_cnt--;
368e1361
DC
1772 dqpout = dqp;
1773 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
0fbca4d1 1774qhunlock:
368e1361
DC
1775 mutex_unlock(&dqp->q_hash->qh_lock);
1776dqfunlock:
1777 xfs_dqfunlock(dqp);
0fbca4d1 1778dqunlock:
1da177e4 1779 xfs_dqunlock(dqp);
368e1361
DC
1780 if (dqpout)
1781 break;
1782 if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
0fbca4d1
DC
1783 break;
1784 if (startagain) {
1785 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1786 goto again;
1787 }
1da177e4 1788 }
3a8406f6 1789 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
368e1361 1790 return dqpout;
1da177e4
LT
1791}
1792
368e1361
DC
1793/*
1794 * Traverse the freelist of dquots and attempt to reclaim a maximum of
1795 * 'howmany' dquots. This operation races with dqlookup(), and attempts to
1796 * favor the lookup function ...
1797 */
1798STATIC int
1799xfs_qm_shake_freelist(
1800 int howmany)
1801{
1802 int nreclaimed = 0;
1803 xfs_dquot_t *dqp;
1804
1805 if (howmany <= 0)
1806 return 0;
1807
1808 while (nreclaimed < howmany) {
1809 dqp = xfs_qm_dqreclaim_one();
1810 if (!dqp)
1811 return nreclaimed;
1812 xfs_qm_dqdestroy(dqp);
1813 nreclaimed++;
1814 }
1815 return nreclaimed;
1816}
1da177e4
LT
1817
1818/*
1819 * The kmem_shake interface is invoked when memory is running low.
1820 */
1821/* ARGSUSED */
1822STATIC int
7f8275d0
DC
1823xfs_qm_shake(
1824 struct shrinker *shrink,
1495f230 1825 struct shrink_control *sc)
1da177e4
LT
1826{
1827 int ndqused, nfree, n;
1495f230 1828 gfp_t gfp_mask = sc->gfp_mask;
1da177e4
LT
1829
1830 if (!kmem_shake_allow(gfp_mask))
014c2544 1831 return 0;
1da177e4 1832 if (!xfs_Gqm)
014c2544 1833 return 0;
1da177e4 1834
3a8406f6 1835 nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */
1da177e4
LT
1836 /* incore dquots in all f/s's */
1837 ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
1838
1839 ASSERT(ndqused >= 0);
1840
1841 if (nfree <= ndqused && nfree < ndquot)
014c2544 1842 return 0;
1da177e4
LT
1843
1844 ndqused *= xfs_Gqm->qm_dqfree_ratio; /* target # of free dquots */
1845 n = nfree - ndqused - ndquot; /* # over target */
1846
1847 return xfs_qm_shake_freelist(MAX(nfree, n));
1848}
1849
1850
1da177e4
LT
1851/*------------------------------------------------------------------*/
1852
1853/*
1854 * Return a new incore dquot. Depending on the number of
1855 * dquots in the system, we either allocate a new one on the kernel heap,
1856 * or reclaim a free one.
1857 * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
1858 * to reclaim an existing one from the freelist.
1859 */
1860boolean_t
1861xfs_qm_dqalloc_incore(
1862 xfs_dquot_t **O_dqpp)
1863{
1864 xfs_dquot_t *dqp;
1865
1866 /*
1867 * Check against high water mark to see if we want to pop
1868 * a nincompoop dquot off the freelist.
1869 */
1870 if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
1871 /*
1872 * Try to recycle a dquot from the freelist.
1873 */
1874 if ((dqp = xfs_qm_dqreclaim_one())) {
1875 XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
1876 /*
1877 * Just zero the core here. The rest will get
1878 * reinitialized by caller. XXX we shouldn't even
1879 * do this zero ...
1880 */
1881 memset(&dqp->q_core, 0, sizeof(dqp->q_core));
1882 *O_dqpp = dqp;
014c2544 1883 return B_FALSE;
1da177e4
LT
1884 }
1885 XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
1886 }
1887
1888 /*
1889 * Allocate a brand new dquot on the kernel heap and return it
1890 * to the caller to initialize.
1891 */
1892 ASSERT(xfs_Gqm->qm_dqzone != NULL);
1893 *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
1894 atomic_inc(&xfs_Gqm->qm_totaldquots);
1895
014c2544 1896 return B_TRUE;
1da177e4
LT
1897}
1898
1899
1900/*
1901 * Start a transaction and write the incore superblock changes to
1902 * disk. flags parameter indicates which fields have changed.
1903 */
1904int
1905xfs_qm_write_sb_changes(
1906 xfs_mount_t *mp,
1907 __int64_t flags)
1908{
1909 xfs_trans_t *tp;
1910 int error;
1911
1da177e4
LT
1912 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
1913 if ((error = xfs_trans_reserve(tp, 0,
1914 mp->m_sb.sb_sectsize + 128, 0,
1915 0,
1916 XFS_DEFAULT_LOG_COUNT))) {
1917 xfs_trans_cancel(tp, 0);
014c2544 1918 return error;
1da177e4
LT
1919 }
1920
1921 xfs_mod_sb(tp, flags);
e5720eec 1922 error = xfs_trans_commit(tp, 0);
1da177e4 1923
e5720eec 1924 return error;
1da177e4
LT
1925}
1926
1927
1928/* --------------- utility functions for vnodeops ---------------- */
1929
1930
1931/*
6c77b0ea 1932 * Given an inode, a uid, gid and prid make sure that we have
1da177e4
LT
1933 * allocated relevant dquot(s) on disk, and that we won't exceed inode
1934 * quotas by creating this file.
1935 * This also attaches dquot(s) to the given inode after locking it,
1936 * and returns the dquots corresponding to the uid and/or gid.
1937 *
1938 * in : inode (unlocked)
1939 * out : udquot, gdquot with references taken and unlocked
1940 */
1941int
1942xfs_qm_vop_dqalloc(
7d095257
CH
1943 struct xfs_inode *ip,
1944 uid_t uid,
1945 gid_t gid,
1946 prid_t prid,
1947 uint flags,
1948 struct xfs_dquot **O_udqpp,
1949 struct xfs_dquot **O_gdqpp)
1da177e4 1950{
7d095257
CH
1951 struct xfs_mount *mp = ip->i_mount;
1952 struct xfs_dquot *uq, *gq;
1953 int error;
1954 uint lockflags;
1da177e4 1955
7d095257 1956 if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
1da177e4
LT
1957 return 0;
1958
1959 lockflags = XFS_ILOCK_EXCL;
1960 xfs_ilock(ip, lockflags);
1961
bd186aa9 1962 if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip))
1da177e4
LT
1963 gid = ip->i_d.di_gid;
1964
1965 /*
1966 * Attach the dquot(s) to this inode, doing a dquot allocation
1967 * if necessary. The dquot(s) will not be locked.
1968 */
1969 if (XFS_NOT_DQATTACHED(mp, ip)) {
7d095257
CH
1970 error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC);
1971 if (error) {
1da177e4 1972 xfs_iunlock(ip, lockflags);
014c2544 1973 return error;
1da177e4
LT
1974 }
1975 }
1976
1977 uq = gq = NULL;
c8ad20ff 1978 if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
1da177e4
LT
1979 if (ip->i_d.di_uid != uid) {
1980 /*
1981 * What we need is the dquot that has this uid, and
1982 * if we send the inode to dqget, the uid of the inode
1983 * takes priority over what's sent in the uid argument.
1984 * We must unlock inode here before calling dqget if
1985 * we're not sending the inode, because otherwise
1986 * we'll deadlock by doing trans_reserve while
1987 * holding ilock.
1988 */
1989 xfs_iunlock(ip, lockflags);
1990 if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,
1991 XFS_DQ_USER,
1992 XFS_QMOPT_DQALLOC |
1993 XFS_QMOPT_DOWARN,
1994 &uq))) {
1995 ASSERT(error != ENOENT);
014c2544 1996 return error;
1da177e4
LT
1997 }
1998 /*
1999 * Get the ilock in the right order.
2000 */
2001 xfs_dqunlock(uq);
2002 lockflags = XFS_ILOCK_SHARED;
2003 xfs_ilock(ip, lockflags);
2004 } else {
2005 /*
2006 * Take an extra reference, because we'll return
2007 * this to caller
2008 */
2009 ASSERT(ip->i_udquot);
2010 uq = ip->i_udquot;
2011 xfs_dqlock(uq);
2012 XFS_DQHOLD(uq);
2013 xfs_dqunlock(uq);
2014 }
2015 }
c8ad20ff 2016 if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
1da177e4
LT
2017 if (ip->i_d.di_gid != gid) {
2018 xfs_iunlock(ip, lockflags);
2019 if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
2020 XFS_DQ_GROUP,
2021 XFS_QMOPT_DQALLOC |
2022 XFS_QMOPT_DOWARN,
2023 &gq))) {
2024 if (uq)
2025 xfs_qm_dqrele(uq);
2026 ASSERT(error != ENOENT);
014c2544 2027 return error;
1da177e4
LT
2028 }
2029 xfs_dqunlock(gq);
2030 lockflags = XFS_ILOCK_SHARED;
2031 xfs_ilock(ip, lockflags);
2032 } else {
2033 ASSERT(ip->i_gdquot);
2034 gq = ip->i_gdquot;
2035 xfs_dqlock(gq);
2036 XFS_DQHOLD(gq);
2037 xfs_dqunlock(gq);
2038 }
c8ad20ff 2039 } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
6743099c 2040 if (xfs_get_projid(ip) != prid) {
c8ad20ff
NS
2041 xfs_iunlock(ip, lockflags);
2042 if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
2043 XFS_DQ_PROJ,
2044 XFS_QMOPT_DQALLOC |
2045 XFS_QMOPT_DOWARN,
2046 &gq))) {
2047 if (uq)
2048 xfs_qm_dqrele(uq);
2049 ASSERT(error != ENOENT);
2050 return (error);
2051 }
2052 xfs_dqunlock(gq);
2053 lockflags = XFS_ILOCK_SHARED;
2054 xfs_ilock(ip, lockflags);
2055 } else {
2056 ASSERT(ip->i_gdquot);
2057 gq = ip->i_gdquot;
2058 xfs_dqlock(gq);
2059 XFS_DQHOLD(gq);
2060 xfs_dqunlock(gq);
2061 }
1da177e4
LT
2062 }
2063 if (uq)
0b1b213f 2064 trace_xfs_dquot_dqalloc(ip);
1da177e4
LT
2065
2066 xfs_iunlock(ip, lockflags);
2067 if (O_udqpp)
2068 *O_udqpp = uq;
2069 else if (uq)
2070 xfs_qm_dqrele(uq);
2071 if (O_gdqpp)
2072 *O_gdqpp = gq;
2073 else if (gq)
2074 xfs_qm_dqrele(gq);
014c2544 2075 return 0;
1da177e4
LT
2076}
2077
2078/*
2079 * Actually transfer ownership, and do dquot modifications.
2080 * These were already reserved.
2081 */
2082xfs_dquot_t *
2083xfs_qm_vop_chown(
2084 xfs_trans_t *tp,
2085 xfs_inode_t *ip,
2086 xfs_dquot_t **IO_olddq,
2087 xfs_dquot_t *newdq)
2088{
2089 xfs_dquot_t *prevdq;
06d10dd9
NS
2090 uint bfield = XFS_IS_REALTIME_INODE(ip) ?
2091 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
2092
7d095257 2093
579aa9ca 2094 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1da177e4
LT
2095 ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
2096
2097 /* old dquot */
2098 prevdq = *IO_olddq;
2099 ASSERT(prevdq);
2100 ASSERT(prevdq != newdq);
2101
06d10dd9
NS
2102 xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks));
2103 xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
1da177e4
LT
2104
2105 /* the sparkling new dquot */
06d10dd9
NS
2106 xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks);
2107 xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
1da177e4
LT
2108
2109 /*
2110 * Take an extra reference, because the inode
2111 * is going to keep this dquot pointer even
2112 * after the trans_commit.
2113 */
2114 xfs_dqlock(newdq);
2115 XFS_DQHOLD(newdq);
2116 xfs_dqunlock(newdq);
2117 *IO_olddq = newdq;
2118
014c2544 2119 return prevdq;
1da177e4
LT
2120}
2121
2122/*
c8ad20ff 2123 * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID).
1da177e4
LT
2124 */
2125int
2126xfs_qm_vop_chown_reserve(
2127 xfs_trans_t *tp,
2128 xfs_inode_t *ip,
2129 xfs_dquot_t *udqp,
2130 xfs_dquot_t *gdqp,
2131 uint flags)
2132{
7d095257 2133 xfs_mount_t *mp = ip->i_mount;
9a2a7de2 2134 uint delblks, blkflags, prjflags = 0;
1da177e4 2135 xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq;
7d095257
CH
2136 int error;
2137
1da177e4 2138
579aa9ca 2139 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
1da177e4
LT
2140 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
2141
2142 delblks = ip->i_delayed_blks;
2143 delblksudq = delblksgdq = unresudq = unresgdq = NULL;
06d10dd9
NS
2144 blkflags = XFS_IS_REALTIME_INODE(ip) ?
2145 XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
1da177e4
LT
2146
2147 if (XFS_IS_UQUOTA_ON(mp) && udqp &&
1149d96a 2148 ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) {
1da177e4
LT
2149 delblksudq = udqp;
2150 /*
2151 * If there are delayed allocation blocks, then we have to
2152 * unreserve those from the old dquot, and add them to the
2153 * new dquot.
2154 */
2155 if (delblks) {
2156 ASSERT(ip->i_udquot);
2157 unresudq = ip->i_udquot;
2158 }
2159 }
c8ad20ff 2160 if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
9a2a7de2 2161 if (XFS_IS_PQUOTA_ON(ip->i_mount) &&
6743099c 2162 xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id))
9a2a7de2
NS
2163 prjflags = XFS_QMOPT_ENOSPC;
2164
2165 if (prjflags ||
2166 (XFS_IS_GQUOTA_ON(ip->i_mount) &&
2167 ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) {
c8ad20ff
NS
2168 delblksgdq = gdqp;
2169 if (delblks) {
2170 ASSERT(ip->i_gdquot);
2171 unresgdq = ip->i_gdquot;
2172 }
1da177e4
LT
2173 }
2174 }
2175
2176 if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
2177 delblksudq, delblksgdq, ip->i_d.di_nblocks, 1,
9a2a7de2 2178 flags | blkflags | prjflags)))
1da177e4
LT
2179 return (error);
2180
2181 /*
2182 * Do the delayed blks reservations/unreservations now. Since, these
2183 * are done without the help of a transaction, if a reservation fails
2184 * its previous reservations won't be automatically undone by trans
2185 * code. So, we have to do it manually here.
2186 */
2187 if (delblks) {
2188 /*
2189 * Do the reservations first. Unreservation can't fail.
2190 */
2191 ASSERT(delblksudq || delblksgdq);
2192 ASSERT(unresudq || unresgdq);
2193 if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
2194 delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0,
9a2a7de2 2195 flags | blkflags | prjflags)))
1da177e4
LT
2196 return (error);
2197 xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
2198 unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0,
06d10dd9 2199 blkflags);
1da177e4
LT
2200 }
2201
2202 return (0);
2203}
2204
2205int
2206xfs_qm_vop_rename_dqattach(
7d095257 2207 struct xfs_inode **i_tab)
1da177e4 2208{
7d095257
CH
2209 struct xfs_mount *mp = i_tab[0]->i_mount;
2210 int i;
1da177e4 2211
7d095257 2212 if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
014c2544 2213 return 0;
1da177e4 2214
7d095257
CH
2215 for (i = 0; (i < 4 && i_tab[i]); i++) {
2216 struct xfs_inode *ip = i_tab[i];
2217 int error;
2218
1da177e4
LT
2219 /*
2220 * Watch out for duplicate entries in the table.
2221 */
7d095257
CH
2222 if (i == 0 || ip != i_tab[i-1]) {
2223 if (XFS_NOT_DQATTACHED(mp, ip)) {
1da177e4
LT
2224 error = xfs_qm_dqattach(ip, 0);
2225 if (error)
014c2544 2226 return error;
1da177e4
LT
2227 }
2228 }
2229 }
014c2544 2230 return 0;
1da177e4
LT
2231}
2232
2233void
7d095257
CH
2234xfs_qm_vop_create_dqattach(
2235 struct xfs_trans *tp,
2236 struct xfs_inode *ip,
2237 struct xfs_dquot *udqp,
2238 struct xfs_dquot *gdqp)
1da177e4 2239{
7d095257
CH
2240 struct xfs_mount *mp = tp->t_mountp;
2241
2242 if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
1da177e4
LT
2243 return;
2244
579aa9ca 2245 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
7d095257 2246 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1da177e4
LT
2247
2248 if (udqp) {
2249 xfs_dqlock(udqp);
2250 XFS_DQHOLD(udqp);
2251 xfs_dqunlock(udqp);
2252 ASSERT(ip->i_udquot == NULL);
2253 ip->i_udquot = udqp;
7d095257 2254 ASSERT(XFS_IS_UQUOTA_ON(mp));
1149d96a 2255 ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
1da177e4
LT
2256 xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
2257 }
2258 if (gdqp) {
2259 xfs_dqlock(gdqp);
2260 XFS_DQHOLD(gdqp);
2261 xfs_dqunlock(gdqp);
2262 ASSERT(ip->i_gdquot == NULL);
2263 ip->i_gdquot = gdqp;
7d095257
CH
2264 ASSERT(XFS_IS_OQUOTA_ON(mp));
2265 ASSERT((XFS_IS_GQUOTA_ON(mp) ?
6743099c 2266 ip->i_d.di_gid : xfs_get_projid(ip)) ==
ee2a4f7c 2267 be32_to_cpu(gdqp->q_core.d_id));
1da177e4
LT
2268 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
2269 }
2270}
2271