]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - fs/nfs/nfs4filelayoutdev.c
NFSv4.1: filelayout i/o helpers
[mirror_ubuntu-bionic-kernel.git] / fs / nfs / nfs4filelayoutdev.c
1 /*
2 * Device operations for the pnfs nfs4 file layout driver.
3 *
4 * Copyright (c) 2002
5 * The Regents of the University of Michigan
6 * All Rights Reserved
7 *
8 * Dean Hildebrand <dhildebz@umich.edu>
9 * Garth Goodson <Garth.Goodson@netapp.com>
10 *
11 * Permission is granted to use, copy, create derivative works, and
12 * redistribute this software and such derivative works for any purpose,
13 * so long as the name of the University of Michigan is not used in
14 * any advertising or publicity pertaining to the use or distribution
15 * of this software without specific, written prior authorization. If
16 * the above copyright notice or any other identification of the
17 * University of Michigan is included in any copy of any portion of
18 * this software, then the disclaimer below must also be included.
19 *
20 * This software is provided as is, without representation or warranty
21 * of any kind either express or implied, including without limitation
22 * the implied warranties of merchantability, fitness for a particular
23 * purpose, or noninfringement. The Regents of the University of
24 * Michigan shall not be liable for any damages, including special,
25 * indirect, incidental, or consequential damages, with respect to any
26 * claim arising out of or in connection with the use of the software,
27 * even if it has been or is hereafter advised of the possibility of
28 * such damages.
29 */
30
31 #include <linux/nfs_fs.h>
32 #include <linux/vmalloc.h>
33
34 #include "internal.h"
35 #include "nfs4filelayout.h"
36
37 #define NFSDBG_FACILITY NFSDBG_PNFS_LD
38
39 /*
40 * Data server cache
41 *
42 * Data servers can be mapped to different device ids.
43 * nfs4_pnfs_ds reference counting
44 * - set to 1 on allocation
45 * - incremented when a device id maps a data server already in the cache.
46 * - decremented when deviceid is removed from the cache.
47 */
48 DEFINE_SPINLOCK(nfs4_ds_cache_lock);
49 static LIST_HEAD(nfs4_data_server_cache);
50
51 /* Debug routines */
52 void
53 print_ds(struct nfs4_pnfs_ds *ds)
54 {
55 if (ds == NULL) {
56 printk("%s NULL device\n", __func__);
57 return;
58 }
59 printk(" ip_addr %x port %hu\n"
60 " ref count %d\n"
61 " client %p\n"
62 " cl_exchange_flags %x\n",
63 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
64 atomic_read(&ds->ds_count), ds->ds_clp,
65 ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
66 }
67
68 void
69 print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr)
70 {
71 int i;
72
73 ifdebug(FACILITY) {
74 printk("%s dsaddr->ds_num %d\n", __func__,
75 dsaddr->ds_num);
76 for (i = 0; i < dsaddr->ds_num; i++)
77 print_ds(dsaddr->ds_list[i]);
78 }
79 }
80
81 void print_deviceid(struct nfs4_deviceid *id)
82 {
83 u32 *p = (u32 *)id;
84
85 dprintk("%s: device id= [%x%x%x%x]\n", __func__,
86 p[0], p[1], p[2], p[3]);
87 }
88
89 /* nfs4_ds_cache_lock is held */
90 static struct nfs4_pnfs_ds *
91 _data_server_lookup_locked(u32 ip_addr, u32 port)
92 {
93 struct nfs4_pnfs_ds *ds;
94
95 dprintk("_data_server_lookup: ip_addr=%x port=%hu\n",
96 ntohl(ip_addr), ntohs(port));
97
98 list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
99 if (ds->ds_ip_addr == ip_addr &&
100 ds->ds_port == port) {
101 return ds;
102 }
103 }
104 return NULL;
105 }
106
107 /*
108 * Create an rpc connection to the nfs4_pnfs_ds data server
109 * Currently only support IPv4
110 */
111 static int
112 nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
113 {
114 struct nfs_client *clp;
115 struct sockaddr_in sin;
116 int status = 0;
117
118 dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__,
119 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
120 mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
121
122 sin.sin_family = AF_INET;
123 sin.sin_addr.s_addr = ds->ds_ip_addr;
124 sin.sin_port = ds->ds_port;
125
126 clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin,
127 sizeof(sin), IPPROTO_TCP);
128 if (IS_ERR(clp)) {
129 status = PTR_ERR(clp);
130 goto out;
131 }
132
133 if ((clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) != 0) {
134 if (!is_ds_client(clp)) {
135 status = -ENODEV;
136 goto out_put;
137 }
138 ds->ds_clp = clp;
139 dprintk("%s [existing] ip=%x, port=%hu\n", __func__,
140 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
141 goto out;
142 }
143
144 /*
145 * Do not set NFS_CS_CHECK_LEASE_TIME instead set the DS lease to
146 * be equal to the MDS lease. Renewal is scheduled in create_session.
147 */
148 spin_lock(&mds_srv->nfs_client->cl_lock);
149 clp->cl_lease_time = mds_srv->nfs_client->cl_lease_time;
150 spin_unlock(&mds_srv->nfs_client->cl_lock);
151 clp->cl_last_renewal = jiffies;
152
153 /* New nfs_client */
154 status = nfs4_init_ds_session(clp);
155 if (status)
156 goto out_put;
157
158 ds->ds_clp = clp;
159 dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr),
160 ntohs(ds->ds_port));
161 out:
162 return status;
163 out_put:
164 nfs_put_client(clp);
165 goto out;
166 }
167
168 static void
169 destroy_ds(struct nfs4_pnfs_ds *ds)
170 {
171 dprintk("--> %s\n", __func__);
172 ifdebug(FACILITY)
173 print_ds(ds);
174
175 if (ds->ds_clp)
176 nfs_put_client(ds->ds_clp);
177 kfree(ds);
178 }
179
180 static void
181 nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
182 {
183 struct nfs4_pnfs_ds *ds;
184 int i;
185
186 print_deviceid(&dsaddr->deviceid.de_id);
187
188 for (i = 0; i < dsaddr->ds_num; i++) {
189 ds = dsaddr->ds_list[i];
190 if (ds != NULL) {
191 if (atomic_dec_and_lock(&ds->ds_count,
192 &nfs4_ds_cache_lock)) {
193 list_del_init(&ds->ds_node);
194 spin_unlock(&nfs4_ds_cache_lock);
195 destroy_ds(ds);
196 }
197 }
198 }
199 kfree(dsaddr->stripe_indices);
200 kfree(dsaddr);
201 }
202
203 void
204 nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *device)
205 {
206 struct nfs4_file_layout_dsaddr *dsaddr =
207 container_of(device, struct nfs4_file_layout_dsaddr, deviceid);
208
209 nfs4_fl_free_deviceid(dsaddr);
210 }
211
212 static struct nfs4_pnfs_ds *
213 nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port)
214 {
215 struct nfs4_pnfs_ds *tmp_ds, *ds;
216
217 ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL);
218 if (!ds)
219 goto out;
220
221 spin_lock(&nfs4_ds_cache_lock);
222 tmp_ds = _data_server_lookup_locked(ip_addr, port);
223 if (tmp_ds == NULL) {
224 ds->ds_ip_addr = ip_addr;
225 ds->ds_port = port;
226 atomic_set(&ds->ds_count, 1);
227 INIT_LIST_HEAD(&ds->ds_node);
228 ds->ds_clp = NULL;
229 list_add(&ds->ds_node, &nfs4_data_server_cache);
230 dprintk("%s add new data server ip 0x%x\n", __func__,
231 ds->ds_ip_addr);
232 } else {
233 kfree(ds);
234 atomic_inc(&tmp_ds->ds_count);
235 dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n",
236 __func__, tmp_ds->ds_ip_addr,
237 atomic_read(&tmp_ds->ds_count));
238 ds = tmp_ds;
239 }
240 spin_unlock(&nfs4_ds_cache_lock);
241 out:
242 return ds;
243 }
244
245 /*
246 * Currently only support ipv4, and one multi-path address.
247 */
248 static struct nfs4_pnfs_ds *
249 decode_and_add_ds(__be32 **pp, struct inode *inode)
250 {
251 struct nfs4_pnfs_ds *ds = NULL;
252 char *buf;
253 const char *ipend, *pstr;
254 u32 ip_addr, port;
255 int nlen, rlen, i;
256 int tmp[2];
257 __be32 *r_netid, *r_addr, *p = *pp;
258
259 /* r_netid */
260 nlen = be32_to_cpup(p++);
261 r_netid = p;
262 p += XDR_QUADLEN(nlen);
263
264 /* r_addr */
265 rlen = be32_to_cpup(p++);
266 r_addr = p;
267 p += XDR_QUADLEN(rlen);
268 *pp = p;
269
270 /* Check that netid is "tcp" */
271 if (nlen != 3 || memcmp((char *)r_netid, "tcp", 3)) {
272 dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__);
273 goto out_err;
274 }
275
276 /* ipv6 length plus port is legal */
277 if (rlen > INET6_ADDRSTRLEN + 8) {
278 dprintk("%s: Invalid address, length %d\n", __func__,
279 rlen);
280 goto out_err;
281 }
282 buf = kmalloc(rlen + 1, GFP_KERNEL);
283 if (!buf) {
284 dprintk("%s: Not enough memory\n", __func__);
285 goto out_err;
286 }
287 buf[rlen] = '\0';
288 memcpy(buf, r_addr, rlen);
289
290 /* replace the port dots with dashes for the in4_pton() delimiter*/
291 for (i = 0; i < 2; i++) {
292 char *res = strrchr(buf, '.');
293 if (!res) {
294 dprintk("%s: Failed finding expected dots in port\n",
295 __func__);
296 goto out_free;
297 }
298 *res = '-';
299 }
300
301 /* Currently only support ipv4 address */
302 if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) {
303 dprintk("%s: Only ipv4 addresses supported\n", __func__);
304 goto out_free;
305 }
306
307 /* port */
308 pstr = ipend;
309 sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]);
310 port = htons((tmp[0] << 8) | (tmp[1]));
311
312 ds = nfs4_pnfs_ds_add(inode, ip_addr, port);
313 dprintk("%s: Decoded address and port %s\n", __func__, buf);
314 out_free:
315 kfree(buf);
316 out_err:
317 return ds;
318 }
319
320 /* Decode opaque device data and return the result */
321 static struct nfs4_file_layout_dsaddr*
322 decode_device(struct inode *ino, struct pnfs_device *pdev)
323 {
324 int i, dummy;
325 u32 cnt, num;
326 u8 *indexp;
327 __be32 *p = (__be32 *)pdev->area, *indicesp;
328 struct nfs4_file_layout_dsaddr *dsaddr;
329
330 /* Get the stripe count (number of stripe index) */
331 cnt = be32_to_cpup(p++);
332 dprintk("%s stripe count %d\n", __func__, cnt);
333 if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
334 printk(KERN_WARNING "%s: stripe count %d greater than "
335 "supported maximum %d\n", __func__,
336 cnt, NFS4_PNFS_MAX_STRIPE_CNT);
337 goto out_err;
338 }
339
340 /* Check the multipath list count */
341 indicesp = p;
342 p += XDR_QUADLEN(cnt << 2);
343 num = be32_to_cpup(p++);
344 dprintk("%s ds_num %u\n", __func__, num);
345 if (num > NFS4_PNFS_MAX_MULTI_CNT) {
346 printk(KERN_WARNING "%s: multipath count %d greater than "
347 "supported maximum %d\n", __func__,
348 num, NFS4_PNFS_MAX_MULTI_CNT);
349 goto out_err;
350 }
351 dsaddr = kzalloc(sizeof(*dsaddr) +
352 (sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
353 GFP_KERNEL);
354 if (!dsaddr)
355 goto out_err;
356
357 dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL);
358 if (!dsaddr->stripe_indices)
359 goto out_err_free;
360
361 dsaddr->stripe_count = cnt;
362 dsaddr->ds_num = num;
363
364 memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id, sizeof(pdev->dev_id));
365
366 /* Go back an read stripe indices */
367 p = indicesp;
368 indexp = &dsaddr->stripe_indices[0];
369 for (i = 0; i < dsaddr->stripe_count; i++) {
370 *indexp = be32_to_cpup(p++);
371 if (*indexp >= num)
372 goto out_err_free;
373 indexp++;
374 }
375 /* Skip already read multipath list count */
376 p++;
377
378 for (i = 0; i < dsaddr->ds_num; i++) {
379 int j;
380
381 dummy = be32_to_cpup(p++); /* multipath count */
382 if (dummy > 1) {
383 printk(KERN_WARNING
384 "%s: Multipath count %d not supported, "
385 "skipping all greater than 1\n", __func__,
386 dummy);
387 }
388 for (j = 0; j < dummy; j++) {
389 if (j == 0) {
390 dsaddr->ds_list[i] = decode_and_add_ds(&p, ino);
391 if (dsaddr->ds_list[i] == NULL)
392 goto out_err_free;
393 } else {
394 u32 len;
395 /* skip extra multipath */
396 len = be32_to_cpup(p++);
397 p += XDR_QUADLEN(len);
398 len = be32_to_cpup(p++);
399 p += XDR_QUADLEN(len);
400 continue;
401 }
402 }
403 }
404 return dsaddr;
405
406 out_err_free:
407 nfs4_fl_free_deviceid(dsaddr);
408 out_err:
409 dprintk("%s ERROR: returning NULL\n", __func__);
410 return NULL;
411 }
412
413 /*
414 * Decode the opaque device specified in 'dev'
415 * and add it to the list of available devices.
416 * If the deviceid is already cached, nfs4_add_deviceid will return
417 * a pointer to the cached struct and throw away the new.
418 */
419 static struct nfs4_file_layout_dsaddr*
420 decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
421 {
422 struct nfs4_file_layout_dsaddr *dsaddr;
423 struct pnfs_deviceid_node *d;
424
425 dsaddr = decode_device(inode, dev);
426 if (!dsaddr) {
427 printk(KERN_WARNING "%s: Could not decode or add device\n",
428 __func__);
429 return NULL;
430 }
431
432 d = pnfs_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache,
433 &dsaddr->deviceid);
434
435 return container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
436 }
437
438 /*
439 * Retrieve the information for dev_id, add it to the list
440 * of available devices, and return it.
441 */
442 struct nfs4_file_layout_dsaddr *
443 get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
444 {
445 struct pnfs_device *pdev = NULL;
446 u32 max_resp_sz;
447 int max_pages;
448 struct page **pages = NULL;
449 struct nfs4_file_layout_dsaddr *dsaddr = NULL;
450 int rc, i;
451 struct nfs_server *server = NFS_SERVER(inode);
452
453 /*
454 * Use the session max response size as the basis for setting
455 * GETDEVICEINFO's maxcount
456 */
457 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
458 max_pages = max_resp_sz >> PAGE_SHIFT;
459 dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
460 __func__, inode, max_resp_sz, max_pages);
461
462 pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL);
463 if (pdev == NULL)
464 return NULL;
465
466 pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
467 if (pages == NULL) {
468 kfree(pdev);
469 return NULL;
470 }
471 for (i = 0; i < max_pages; i++) {
472 pages[i] = alloc_page(GFP_KERNEL);
473 if (!pages[i])
474 goto out_free;
475 }
476
477 /* set pdev->area */
478 pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
479 if (!pdev->area)
480 goto out_free;
481
482 memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id));
483 pdev->layout_type = LAYOUT_NFSV4_1_FILES;
484 pdev->pages = pages;
485 pdev->pgbase = 0;
486 pdev->pglen = PAGE_SIZE * max_pages;
487 pdev->mincount = 0;
488
489 rc = nfs4_proc_getdeviceinfo(server, pdev);
490 dprintk("%s getdevice info returns %d\n", __func__, rc);
491 if (rc)
492 goto out_free;
493
494 /*
495 * Found new device, need to decode it and then add it to the
496 * list of known devices for this mountpoint.
497 */
498 dsaddr = decode_and_add_device(inode, pdev);
499 out_free:
500 if (pdev->area != NULL)
501 vunmap(pdev->area);
502 for (i = 0; i < max_pages; i++)
503 __free_page(pages[i]);
504 kfree(pages);
505 kfree(pdev);
506 dprintk("<-- %s dsaddr %p\n", __func__, dsaddr);
507 return dsaddr;
508 }
509
510 struct nfs4_file_layout_dsaddr *
511 nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct nfs4_deviceid *id)
512 {
513 struct pnfs_deviceid_node *d;
514
515 d = pnfs_find_get_deviceid(clp->cl_devid_cache, id);
516 return (d == NULL) ? NULL :
517 container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
518 }
519
520 /*
521 * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit
522 * Then: ((res + fsi) % dsaddr->stripe_count)
523 */
524 u32
525 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset)
526 {
527 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
528 u64 tmp;
529
530 tmp = offset - flseg->pattern_offset;
531 do_div(tmp, flseg->stripe_unit);
532 tmp += flseg->first_stripe_index;
533 return do_div(tmp, flseg->dsaddr->stripe_count);
534 }
535
536 u32
537 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j)
538 {
539 return FILELAYOUT_LSEG(lseg)->dsaddr->stripe_indices[j];
540 }
541
542 struct nfs_fh *
543 nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
544 {
545 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
546 u32 i;
547
548 if (flseg->stripe_type == STRIPE_SPARSE) {
549 if (flseg->num_fh == 1)
550 i = 0;
551 else if (flseg->num_fh == 0)
552 /* Use the MDS OPEN fh set in nfs_read_rpcsetup */
553 return NULL;
554 else
555 i = nfs4_fl_calc_ds_index(lseg, j);
556 } else
557 i = j;
558 return flseg->fh_array[i];
559 }
560
561 struct nfs4_pnfs_ds *
562 nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
563 {
564 struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
565 struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
566
567 if (ds == NULL) {
568 printk(KERN_ERR "%s: No data server for offset index %d\n",
569 __func__, ds_idx);
570 return NULL;
571 }
572
573 if (!ds->ds_clp) {
574 int err;
575
576 err = nfs4_ds_connect(NFS_SERVER(lseg->pls_layout->plh_inode),
577 dsaddr->ds_list[ds_idx]);
578 if (err) {
579 printk(KERN_ERR "%s nfs4_ds_connect error %d\n",
580 __func__, err);
581 return NULL;
582 }
583 }
584 return ds;
585 }